// Vita3K emulator project
// Copyright (C) 2025 Vita3K team
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

#include "SceGxm.h"

#include <modules/module_parent.h>

#include <span>
#include <stack>
#if defined(__x86_64__) && !defined(__APPLE__)
#include <xxh_x86dispatch.h>
#else
#define XXH_INLINE_ALL
#include <xxhash.h>
#endif

#include <display/functions.h>
#include <display/state.h>
#include <gxm/functions.h>
#include <gxm/state.h>
#include <gxm/types.h>
#include <kernel/state.h>
#include <mem/state.h>

#include <io/state.h>
#include <mem/mempool.h>
#include <renderer/functions.h>
#include <renderer/state.h>
#include <renderer/types.h>
#include <util/bytes.h>
#include <util/log.h>

#include <util/tracy.h>
TRACY_MODULE_NAME(SceGxm);

template <>
std::string to_debug_str<SceGxmColorFormat>(const MemState &mem, SceGxmColorFormat type) {
    switch (type) {
    case SCE_GXM_COLOR_FORMAT_U8U8U8U8_ABGR: return "SCE_GXM_COLOR_FORMAT_U8U8U8U8_ABGR";
    case SCE_GXM_COLOR_FORMAT_U8U8U8U8_ARGB: return "SCE_GXM_COLOR_FORMAT_U8U8U8U8_ARGB";
    case SCE_GXM_COLOR_FORMAT_U8U8U8U8_RGBA: return "SCE_GXM_COLOR_FORMAT_U8U8U8U8_RGBA";
    case SCE_GXM_COLOR_FORMAT_U8U8U8U8_BGRA: return "SCE_GXM_COLOR_FORMAT_U8U8U8U8_BGRA";

    case SCE_GXM_COLOR_FORMAT_U8U8U8_BGR: return "SCE_GXM_COLOR_FORMAT_U8U8U8_BGR";
    case SCE_GXM_COLOR_FORMAT_U8U8U8_RGB: return "SCE_GXM_COLOR_FORMAT_U8U8U8_RGB";

    case SCE_GXM_COLOR_FORMAT_U5U6U5_BGR: return "SCE_GXM_COLOR_FORMAT_U5U6U5_BGR";
    case SCE_GXM_COLOR_FORMAT_U5U6U5_RGB: return "SCE_GXM_COLOR_FORMAT_U5U6U5_RGB";

    case SCE_GXM_COLOR_FORMAT_U1U5U5U5_ABGR: return "SCE_GXM_COLOR_FORMAT_U1U5U5U5_ABGR";
    case SCE_GXM_COLOR_FORMAT_U1U5U5U5_ARGB: return "SCE_GXM_COLOR_FORMAT_U1U5U5U5_ARGB";
    case SCE_GXM_COLOR_FORMAT_U5U5U5U1_RGBA: return "SCE_GXM_COLOR_FORMAT_U5U5U5U1_RGBA";
    case SCE_GXM_COLOR_FORMAT_U5U5U5U1_BGRA: return "SCE_GXM_COLOR_FORMAT_U5U5U5U1_BGRA";

    case SCE_GXM_COLOR_FORMAT_U4U4U4U4_ABGR: return "SCE_GXM_COLOR_FORMAT_U4U4U4U4_ABGR";
    case SCE_GXM_COLOR_FORMAT_U4U4U4U4_ARGB: return "SCE_GXM_COLOR_FORMAT_U4U4U4U4_ARGB";
    case SCE_GXM_COLOR_FORMAT_U4U4U4U4_RGBA: return "SCE_GXM_COLOR_FORMAT_U4U4U4U4_RGBA";
    case SCE_GXM_COLOR_FORMAT_U4U4U4U4_BGRA: return "SCE_GXM_COLOR_FORMAT_U4U4U4U4_BGRA";

    case SCE_GXM_COLOR_FORMAT_U8U3U3U2_ARGB: return "SCE_GXM_COLOR_FORMAT_U8U3U3U2_ARGB";

    case SCE_GXM_COLOR_FORMAT_F16_R: return "SCE_GXM_COLOR_FORMAT_F16_R";
    case SCE_GXM_COLOR_FORMAT_F16_G: return "SCE_GXM_COLOR_FORMAT_F16_G";

    case SCE_GXM_COLOR_FORMAT_F16F16_GR: return "SCE_GXM_COLOR_FORMAT_F16F16_GR";
    case SCE_GXM_COLOR_FORMAT_F16F16_RG: return "SCE_GXM_COLOR_FORMAT_F16F16_RG";

    case SCE_GXM_COLOR_FORMAT_F32_R: return "SCE_GXM_COLOR_FORMAT_F32_R";

    case SCE_GXM_COLOR_FORMAT_S16_R: return "SCE_GXM_COLOR_FORMAT_S16_R";
    case SCE_GXM_COLOR_FORMAT_S16_G: return "SCE_GXM_COLOR_FORMAT_S16_G";

    case SCE_GXM_COLOR_FORMAT_S16S16_GR: return "SCE_GXM_COLOR_FORMAT_S16S16_GR";
    case SCE_GXM_COLOR_FORMAT_S16S16_RG: return "SCE_GXM_COLOR_FORMAT_S16S16_RG";

    case SCE_GXM_COLOR_FORMAT_U16_R: return "SCE_GXM_COLOR_FORMAT_U16_R";
    case SCE_GXM_COLOR_FORMAT_U16_G: return "SCE_GXM_COLOR_FORMAT_U16_G";

    case SCE_GXM_COLOR_FORMAT_U16U16_GR: return "SCE_GXM_COLOR_FORMAT_U16U16_GR";
    case SCE_GXM_COLOR_FORMAT_U16U16_RG: return "SCE_GXM_COLOR_FORMAT_U16U16_RG";

    case SCE_GXM_COLOR_FORMAT_U2U10U10U10_ABGR: return "SCE_GXM_COLOR_FORMAT_U2U10U10U10_ABGR";
    case SCE_GXM_COLOR_FORMAT_U2U10U10U10_ARGB: return "SCE_GXM_COLOR_FORMAT_U2U10U10U10_ARGB";
    case SCE_GXM_COLOR_FORMAT_U10U10U10U2_RGBA: return "SCE_GXM_COLOR_FORMAT_U10U10U10U2_RGBA";
    case SCE_GXM_COLOR_FORMAT_U10U10U10U2_BGRA: return "SCE_GXM_COLOR_FORMAT_U10U10U10U2_BGRA";

    case SCE_GXM_COLOR_FORMAT_U8_R: return "SCE_GXM_COLOR_FORMAT_U8_R";
    case SCE_GXM_COLOR_FORMAT_U8_A: return "SCE_GXM_COLOR_FORMAT_U8_A";

    case SCE_GXM_COLOR_FORMAT_S8_R: return "SCE_GXM_COLOR_FORMAT_S8_R";
    case SCE_GXM_COLOR_FORMAT_S8_A: return "SCE_GXM_COLOR_FORMAT_S8_A";

    case SCE_GXM_COLOR_FORMAT_U6S5S5_BGR: return "SCE_GXM_COLOR_FORMAT_U6S5S5_BGR";
    case SCE_GXM_COLOR_FORMAT_S5S5U6_RGB: return "SCE_GXM_COLOR_FORMAT_S5S5U6_RGB";

    case SCE_GXM_COLOR_FORMAT_U8U8_GR: return "SCE_GXM_COLOR_FORMAT_U8U8_GR";
    case SCE_GXM_COLOR_FORMAT_U8U8_RG: return "SCE_GXM_COLOR_FORMAT_U8U8_RG";
    case SCE_GXM_COLOR_FORMAT_U8U8_RA: return "SCE_GXM_COLOR_FORMAT_U8U8_RA";
    case SCE_GXM_COLOR_FORMAT_U8U8_AR: return "SCE_GXM_COLOR_FORMAT_U8U8_AR";

    case SCE_GXM_COLOR_FORMAT_S8S8_GR: return "SCE_GXM_COLOR_FORMAT_S8S8_GR";
    case SCE_GXM_COLOR_FORMAT_S8S8_RG: return "SCE_GXM_COLOR_FORMAT_S8S8_RG";
    case SCE_GXM_COLOR_FORMAT_S8S8_RA: return "SCE_GXM_COLOR_FORMAT_S8S8_RA";
    case SCE_GXM_COLOR_FORMAT_S8S8_AR: return "SCE_GXM_COLOR_FORMAT_S8S8_AR";

    case SCE_GXM_COLOR_FORMAT_U8S8S8U8_ABGR: return "SCE_GXM_COLOR_FORMAT_U8S8S8U8_ABGR";
    case SCE_GXM_COLOR_FORMAT_U8U8S8S8_ARGB: return "SCE_GXM_COLOR_FORMAT_U8U8S8S8_ARGB";
    case SCE_GXM_COLOR_FORMAT_U8S8S8U8_RGBA: return "SCE_GXM_COLOR_FORMAT_U8S8S8U8_RGBA";
    case SCE_GXM_COLOR_FORMAT_S8S8U8U8_BGRA: return "SCE_GXM_COLOR_FORMAT_S8S8U8U8_BGRA";

    case SCE_GXM_COLOR_FORMAT_S8S8S8S8_ABGR: return "SCE_GXM_COLOR_FORMAT_S8S8S8S8_ABGR";
    case SCE_GXM_COLOR_FORMAT_S8S8S8S8_ARGB: return "SCE_GXM_COLOR_FORMAT_S8S8S8S8_ARGB";
    case SCE_GXM_COLOR_FORMAT_S8S8S8S8_RGBA: return "SCE_GXM_COLOR_FORMAT_S8S8S8S8_RGBA";
    case SCE_GXM_COLOR_FORMAT_S8S8S8S8_BGRA: return "SCE_GXM_COLOR_FORMAT_S8S8S8S8_BGRA";

    case SCE_GXM_COLOR_FORMAT_F16F16F16F16_ABGR: return "SCE_GXM_COLOR_FORMAT_F16F16F16F16_ABGR";
    case SCE_GXM_COLOR_FORMAT_F16F16F16F16_ARGB: return "SCE_GXM_COLOR_FORMAT_F16F16F16F16_ARGB";
    case SCE_GXM_COLOR_FORMAT_F16F16F16F16_RGBA: return "SCE_GXM_COLOR_FORMAT_F16F16F16F16_RGBA";
    case SCE_GXM_COLOR_FORMAT_F16F16F16F16_BGRA: return "SCE_GXM_COLOR_FORMAT_F16F16F16F16_BGRA";

    case SCE_GXM_COLOR_FORMAT_F32F32_GR: return "SCE_GXM_COLOR_FORMAT_F32F32_GR";
    case SCE_GXM_COLOR_FORMAT_F32F32_RG: return "SCE_GXM_COLOR_FORMAT_F32F32_RG";

    case SCE_GXM_COLOR_FORMAT_F10F11F11_BGR: return "SCE_GXM_COLOR_FORMAT_F10F11F11_BGR";
    case SCE_GXM_COLOR_FORMAT_F11F11F10_RGB: return "SCE_GXM_COLOR_FORMAT_F11F11F10_RGB";

    case SCE_GXM_COLOR_FORMAT_SE5M9M9M9_BGR: return "SCE_GXM_COLOR_FORMAT_SE5M9M9M9_BGR";
    case SCE_GXM_COLOR_FORMAT_SE5M9M9M9_RGB: return "SCE_GXM_COLOR_FORMAT_SE5M9M9M9_RGB";

    case SCE_GXM_COLOR_FORMAT_U2F10F10F10_ABGR: return "SCE_GXM_COLOR_FORMAT_U2F10F10F10_ABGR";
    case SCE_GXM_COLOR_FORMAT_U2F10F10F10_ARGB: return "SCE_GXM_COLOR_FORMAT_U2F10F10F10_ARGB";
    case SCE_GXM_COLOR_FORMAT_F10F10F10U2_RGBA: return "SCE_GXM_COLOR_FORMAT_F10F10F10U2_RGBA";
    case SCE_GXM_COLOR_FORMAT_F10F10F10U2_BGRA: return "SCE_GXM_COLOR_FORMAT_F10F10F10U2_BGRA";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmColorSurfaceType>(const MemState &mem, SceGxmColorSurfaceType type) {
    switch (type) {
    case SCE_GXM_COLOR_SURFACE_LINEAR: return "SCE_GXM_COLOR_SURFACE_LINEAR";
    case SCE_GXM_COLOR_SURFACE_TILED: return "SCE_GXM_COLOR_SURFACE_TILED";
    case SCE_GXM_COLOR_SURFACE_SWIZZLED: return "SCE_GXM_COLOR_SURFACE_SWIZZLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmColorSurfaceScaleMode>(const MemState &mem, SceGxmColorSurfaceScaleMode type) {
    switch (type) {
    case SCE_GXM_COLOR_SURFACE_SCALE_NONE: return "SCE_GXM_COLOR_SURFACE_SCALE_NONE";
    case SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE: return "SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmOutputRegisterSize>(const MemState &mem, SceGxmOutputRegisterSize type) {
    switch (type) {
    case SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT: return "SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT";
    case SCE_GXM_OUTPUT_REGISTER_SIZE_64BIT: return "SCE_GXM_OUTPUT_REGISTER_SIZE_64BIT";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmColorSurfaceDitherMode>(const MemState &mem, SceGxmColorSurfaceDitherMode type) {
    switch (type) {
    case SCE_GXM_COLOR_SURFACE_DITHER_DISABLED: return "SCE_GXM_COLOR_SURFACE_DITHER_DISABLED";
    case SCE_GXM_COLOR_SURFACE_DITHER_ENABLED: return "SCE_GXM_COLOR_SURFACE_DITHER_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTextureFormat>(const MemState &mem, SceGxmTextureFormat type) {
    switch (type) {
    case SCE_GXM_TEXTURE_FORMAT_U8_000R: return "SCE_GXM_TEXTURE_FORMAT_U8_000R";
    case SCE_GXM_TEXTURE_FORMAT_U8_111R: return "SCE_GXM_TEXTURE_FORMAT_U8_111R";
    case SCE_GXM_TEXTURE_FORMAT_U8_RRRR: return "SCE_GXM_TEXTURE_FORMAT_U8_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_U8_0RRR: return "SCE_GXM_TEXTURE_FORMAT_U8_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_U8_1RRR: return "SCE_GXM_TEXTURE_FORMAT_U8_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_U8_R000: return "SCE_GXM_TEXTURE_FORMAT_U8_R000";
    case SCE_GXM_TEXTURE_FORMAT_U8_R111: return "SCE_GXM_TEXTURE_FORMAT_U8_R111";
    case SCE_GXM_TEXTURE_FORMAT_U8_R: return "SCE_GXM_TEXTURE_FORMAT_U8_R";

    case SCE_GXM_TEXTURE_FORMAT_S8_000R: return "SCE_GXM_TEXTURE_FORMAT_S8_000R";
    case SCE_GXM_TEXTURE_FORMAT_S8_111R: return "SCE_GXM_TEXTURE_FORMAT_S8_111R";
    case SCE_GXM_TEXTURE_FORMAT_S8_RRRR: return "SCE_GXM_TEXTURE_FORMAT_S8_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_S8_0RRR: return "SCE_GXM_TEXTURE_FORMAT_S8_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_S8_1RRR: return "SCE_GXM_TEXTURE_FORMAT_S8_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_S8_R000: return "SCE_GXM_TEXTURE_FORMAT_S8_R000";
    case SCE_GXM_TEXTURE_FORMAT_S8_R111: return "SCE_GXM_TEXTURE_FORMAT_S8_R111";
    case SCE_GXM_TEXTURE_FORMAT_S8_R: return "SCE_GXM_TEXTURE_FORMAT_S8_R";

    case SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_ABGR: return "SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_RGBA: return "SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_BGRA: return "SCE_GXM_TEXTURE_FORMAT_U4U4U4U4_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X4U4U4U4_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X4U4U4U4_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X4U4U4U4_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X4U4U4U4_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_U4U4U4X4_RGB1: return "SCE_GXM_TEXTURE_FORMAT_U4U4U4X4_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_U4U4U4X4_BGR1: return "SCE_GXM_TEXTURE_FORMAT_U4U4U4X4_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_U8U3U3U2_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U8U3U3U2_ARGB";

    case SCE_GXM_TEXTURE_FORMAT_U1U5U5U5_ABGR: return "SCE_GXM_TEXTURE_FORMAT_U1U5U5U5_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_U1U5U5U5_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U1U5U5U5_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_U5U5U5U1_RGBA: return "SCE_GXM_TEXTURE_FORMAT_U5U5U5U1_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_U5U5U5U1_BGRA: return "SCE_GXM_TEXTURE_FORMAT_U5U5U5U1_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X1U5U5U5_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X1U5U5U5_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X1U5U5U5_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X1U5U5U5_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_U5U5U5X1_RGB1: return "SCE_GXM_TEXTURE_FORMAT_U5U5U5X1_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_U5U5U5X1_BGR1: return "SCE_GXM_TEXTURE_FORMAT_U5U5U5X1_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_U5U6U5_BGR: return "SCE_GXM_TEXTURE_FORMAT_U5U6U5_BGR";
    case SCE_GXM_TEXTURE_FORMAT_U5U6U5_RGB: return "SCE_GXM_TEXTURE_FORMAT_U5U6U5_RGB";

    case SCE_GXM_TEXTURE_FORMAT_U6S5S5_BGR: return "SCE_GXM_TEXTURE_FORMAT_U6S5S5_BGR";
    case SCE_GXM_TEXTURE_FORMAT_S5S5U6_RGB: return "SCE_GXM_TEXTURE_FORMAT_S5S5U6_RGB";

    case SCE_GXM_TEXTURE_FORMAT_U8U8_00GR: return "SCE_GXM_TEXTURE_FORMAT_U8U8_00GR";
    case SCE_GXM_TEXTURE_FORMAT_U8U8_GRRR: return "SCE_GXM_TEXTURE_FORMAT_U8U8_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_U8U8_RGGG: return "SCE_GXM_TEXTURE_FORMAT_U8U8_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_U8U8_GRGR: return "SCE_GXM_TEXTURE_FORMAT_U8U8_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_U8U8_00RG: return "SCE_GXM_TEXTURE_FORMAT_U8U8_00RG";
    case SCE_GXM_TEXTURE_FORMAT_U8U8_GR: return "SCE_GXM_TEXTURE_FORMAT_U8U8_GR";

    case SCE_GXM_TEXTURE_FORMAT_S8S8_00GR: return "SCE_GXM_TEXTURE_FORMAT_S8S8_00GR";
    case SCE_GXM_TEXTURE_FORMAT_S8S8_GRRR: return "SCE_GXM_TEXTURE_FORMAT_S8S8_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_S8S8_RGGG: return "SCE_GXM_TEXTURE_FORMAT_S8S8_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_S8S8_GRGR: return "SCE_GXM_TEXTURE_FORMAT_S8S8_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_S8S8_00RG: return "SCE_GXM_TEXTURE_FORMAT_S8S8_00RG";
    case SCE_GXM_TEXTURE_FORMAT_S8S8_GR: return "SCE_GXM_TEXTURE_FORMAT_S8S8_GR";

    case SCE_GXM_TEXTURE_FORMAT_U16_000R: return "SCE_GXM_TEXTURE_FORMAT_U16_000R";
    case SCE_GXM_TEXTURE_FORMAT_U16_111R: return "SCE_GXM_TEXTURE_FORMAT_U16_111R";
    case SCE_GXM_TEXTURE_FORMAT_U16_RRRR: return "SCE_GXM_TEXTURE_FORMAT_U16_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_U16_0RRR: return "SCE_GXM_TEXTURE_FORMAT_U16_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_U16_1RRR: return "SCE_GXM_TEXTURE_FORMAT_U16_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_U16_R000: return "SCE_GXM_TEXTURE_FORMAT_U16_R000";
    case SCE_GXM_TEXTURE_FORMAT_U16_R111: return "SCE_GXM_TEXTURE_FORMAT_U16_R111";
    case SCE_GXM_TEXTURE_FORMAT_U16_R: return "SCE_GXM_TEXTURE_FORMAT_U16_R";

    case SCE_GXM_TEXTURE_FORMAT_S16_000R: return "SCE_GXM_TEXTURE_FORMAT_S16_000R";
    case SCE_GXM_TEXTURE_FORMAT_S16_111R: return "SCE_GXM_TEXTURE_FORMAT_S16_111R";
    case SCE_GXM_TEXTURE_FORMAT_S16_RRRR: return "SCE_GXM_TEXTURE_FORMAT_S16_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_S16_0RRR: return "SCE_GXM_TEXTURE_FORMAT_S16_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_S16_1RRR: return "SCE_GXM_TEXTURE_FORMAT_S16_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_S16_R000: return "SCE_GXM_TEXTURE_FORMAT_S16_R000";
    case SCE_GXM_TEXTURE_FORMAT_S16_R111: return "SCE_GXM_TEXTURE_FORMAT_S16_R111";
    case SCE_GXM_TEXTURE_FORMAT_S16_R: return "SCE_GXM_TEXTURE_FORMAT_S16_R";

    case SCE_GXM_TEXTURE_FORMAT_F16_000R: return "SCE_GXM_TEXTURE_FORMAT_F16_000R";
    case SCE_GXM_TEXTURE_FORMAT_F16_111R: return "SCE_GXM_TEXTURE_FORMAT_F16_111R";
    case SCE_GXM_TEXTURE_FORMAT_F16_RRRR: return "SCE_GXM_TEXTURE_FORMAT_F16_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_F16_0RRR: return "SCE_GXM_TEXTURE_FORMAT_F16_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_F16_1RRR: return "SCE_GXM_TEXTURE_FORMAT_F16_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_F16_R000: return "SCE_GXM_TEXTURE_FORMAT_F16_R000";
    case SCE_GXM_TEXTURE_FORMAT_F16_R111: return "SCE_GXM_TEXTURE_FORMAT_F16_R111";
    case SCE_GXM_TEXTURE_FORMAT_F16_R: return "SCE_GXM_TEXTURE_FORMAT_F16_R";

    case SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ABGR: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_RGBA: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_BGRA: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X8U8U8U8_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X8U8U8U8_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X8U8U8U8_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X8U8U8U8_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_U8U8U8X8_RGB1: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8X8_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_U8U8U8X8_BGR1: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8X8_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_ABGR: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_ARGB: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_RGBA: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_BGRA: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8S8_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X8S8S8S8_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X8S8S8S8_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X8S8S8S8_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X8S8S8S8_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_S8S8S8X8_RGB1: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8X8_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_S8S8S8X8_BGR1: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8X8_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_U2U10U10U10_ABGR: return "SCE_GXM_TEXTURE_FORMAT_U2U10U10U10_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_U2U10U10U10_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U2U10U10U10_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_U10U10U10U2_RGBA: return "SCE_GXM_TEXTURE_FORMAT_U10U10U10U2_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_U10U10U10U2_BGRA: return "SCE_GXM_TEXTURE_FORMAT_U10U10U10U2_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X2U10U10U10_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X2U10U10U10_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X2U10U10U10_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X2U10U10U10_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_U10U10U10X2_RGB1: return "SCE_GXM_TEXTURE_FORMAT_U10U10U10X2_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_U10U10U10X2_BGR1: return "SCE_GXM_TEXTURE_FORMAT_U10U10U10X2_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_U16U16_00GR: return "SCE_GXM_TEXTURE_FORMAT_U16U16_00GR";
    case SCE_GXM_TEXTURE_FORMAT_U16U16_GRRR: return "SCE_GXM_TEXTURE_FORMAT_U16U16_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_U16U16_RGGG: return "SCE_GXM_TEXTURE_FORMAT_U16U16_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_U16U16_GRGR: return "SCE_GXM_TEXTURE_FORMAT_U16U16_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_U16U16_00RG: return "SCE_GXM_TEXTURE_FORMAT_U16U16_00RG";
    case SCE_GXM_TEXTURE_FORMAT_U16U16_GR: return "SCE_GXM_TEXTURE_FORMAT_U16U16_GR";

    case SCE_GXM_TEXTURE_FORMAT_S16S16_00GR: return "SCE_GXM_TEXTURE_FORMAT_S16S16_00GR";
    case SCE_GXM_TEXTURE_FORMAT_S16S16_GRRR: return "SCE_GXM_TEXTURE_FORMAT_S16S16_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_S16S16_RGGG: return "SCE_GXM_TEXTURE_FORMAT_S16S16_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_S16S16_GRGR: return "SCE_GXM_TEXTURE_FORMAT_S16S16_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_S16S16_00RG: return "SCE_GXM_TEXTURE_FORMAT_S16S16_00RG";
    case SCE_GXM_TEXTURE_FORMAT_S16S16_GR: return "SCE_GXM_TEXTURE_FORMAT_S16S16_GR";

    case SCE_GXM_TEXTURE_FORMAT_F16F16_00GR: return "SCE_GXM_TEXTURE_FORMAT_F16F16_00GR";
    case SCE_GXM_TEXTURE_FORMAT_F16F16_GRRR: return "SCE_GXM_TEXTURE_FORMAT_F16F16_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_F16F16_RGGG: return "SCE_GXM_TEXTURE_FORMAT_F16F16_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_F16F16_GRGR: return "SCE_GXM_TEXTURE_FORMAT_F16F16_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_F16F16_00RG: return "SCE_GXM_TEXTURE_FORMAT_F16F16_00RG";
    case SCE_GXM_TEXTURE_FORMAT_F16F16_GR: return "SCE_GXM_TEXTURE_FORMAT_F16F16_GR";

    case SCE_GXM_TEXTURE_FORMAT_F32_000R: return "SCE_GXM_TEXTURE_FORMAT_F32_000R";
    case SCE_GXM_TEXTURE_FORMAT_F32_111R: return "SCE_GXM_TEXTURE_FORMAT_F32_111R";
    case SCE_GXM_TEXTURE_FORMAT_F32_RRRR: return "SCE_GXM_TEXTURE_FORMAT_F32_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_F32_0RRR: return "SCE_GXM_TEXTURE_FORMAT_F32_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_F32_1RRR: return "SCE_GXM_TEXTURE_FORMAT_F32_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_F32_R000: return "SCE_GXM_TEXTURE_FORMAT_F32_R000";
    case SCE_GXM_TEXTURE_FORMAT_F32_R111: return "SCE_GXM_TEXTURE_FORMAT_F32_R111";
    case SCE_GXM_TEXTURE_FORMAT_F32_R: return "SCE_GXM_TEXTURE_FORMAT_F32_R";

    case SCE_GXM_TEXTURE_FORMAT_F32M_000R: return "SCE_GXM_TEXTURE_FORMAT_F32M_000R";
    case SCE_GXM_TEXTURE_FORMAT_F32M_111R: return "SCE_GXM_TEXTURE_FORMAT_F32M_111R";
    case SCE_GXM_TEXTURE_FORMAT_F32M_RRRR: return "SCE_GXM_TEXTURE_FORMAT_F32M_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_F32M_0RRR: return "SCE_GXM_TEXTURE_FORMAT_F32M_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_F32M_1RRR: return "SCE_GXM_TEXTURE_FORMAT_F32M_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_F32M_R000: return "SCE_GXM_TEXTURE_FORMAT_F32M_R000";
    case SCE_GXM_TEXTURE_FORMAT_F32M_R111: return "SCE_GXM_TEXTURE_FORMAT_F32M_R111";
    case SCE_GXM_TEXTURE_FORMAT_F32M_R: return "SCE_GXM_TEXTURE_FORMAT_F32M_R";

    case SCE_GXM_TEXTURE_FORMAT_X8S8S8U8_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X8S8S8U8_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X8U8S8S8_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X8U8S8S8_1RGB";

    case SCE_GXM_TEXTURE_FORMAT_X8U24_SD: return "SCE_GXM_TEXTURE_FORMAT_X8U24_SD";
    case SCE_GXM_TEXTURE_FORMAT_U24X8_DS: return "SCE_GXM_TEXTURE_FORMAT_U24X8_DS";

    case SCE_GXM_TEXTURE_FORMAT_U32_000R: return "SCE_GXM_TEXTURE_FORMAT_U32_000R";
    case SCE_GXM_TEXTURE_FORMAT_U32_111R: return "SCE_GXM_TEXTURE_FORMAT_U32_111R";
    case SCE_GXM_TEXTURE_FORMAT_U32_RRRR: return "SCE_GXM_TEXTURE_FORMAT_U32_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_U32_0RRR: return "SCE_GXM_TEXTURE_FORMAT_U32_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_U32_1RRR: return "SCE_GXM_TEXTURE_FORMAT_U32_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_U32_R000: return "SCE_GXM_TEXTURE_FORMAT_U32_R000";
    case SCE_GXM_TEXTURE_FORMAT_U32_R111: return "SCE_GXM_TEXTURE_FORMAT_U32_R111";
    case SCE_GXM_TEXTURE_FORMAT_U32_R: return "SCE_GXM_TEXTURE_FORMAT_U32_R";

    case SCE_GXM_TEXTURE_FORMAT_S32_000R: return "SCE_GXM_TEXTURE_FORMAT_S32_000R";
    case SCE_GXM_TEXTURE_FORMAT_S32_111R: return "SCE_GXM_TEXTURE_FORMAT_S32_111R";
    case SCE_GXM_TEXTURE_FORMAT_S32_RRRR: return "SCE_GXM_TEXTURE_FORMAT_S32_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_S32_0RRR: return "SCE_GXM_TEXTURE_FORMAT_S32_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_S32_1RRR: return "SCE_GXM_TEXTURE_FORMAT_S32_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_S32_R000: return "SCE_GXM_TEXTURE_FORMAT_S32_R000";
    case SCE_GXM_TEXTURE_FORMAT_S32_R111: return "SCE_GXM_TEXTURE_FORMAT_S32_R111";
    case SCE_GXM_TEXTURE_FORMAT_S32_R: return "SCE_GXM_TEXTURE_FORMAT_S32_R";

    case SCE_GXM_TEXTURE_FORMAT_SE5M9M9M9_BGR: return "SCE_GXM_TEXTURE_FORMAT_SE5M9M9M9_BGR";
    case SCE_GXM_TEXTURE_FORMAT_SE5M9M9M9_RGB: return "SCE_GXM_TEXTURE_FORMAT_SE5M9M9M9_RGB";

    case SCE_GXM_TEXTURE_FORMAT_F10F11F11_BGR: return "SCE_GXM_TEXTURE_FORMAT_F10F11F11_BGR";
    case SCE_GXM_TEXTURE_FORMAT_F11F11F10_RGB: return "SCE_GXM_TEXTURE_FORMAT_F11F11F10_RGB";

    case SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_ABGR: return "SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_ARGB: return "SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_RGBA: return "SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_BGRA: return "SCE_GXM_TEXTURE_FORMAT_F16F16F16F16_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X16F16F16F16_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X16F16F16F16_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X16F16F16F16_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X16F16F16F16_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_F16F16F16X16_RGB1: return "SCE_GXM_TEXTURE_FORMAT_F16F16F16X16_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_F16F16F16X16_BGR1: return "SCE_GXM_TEXTURE_FORMAT_F16F16F16X16_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_ABGR: return "SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_RGBA: return "SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_BGRA: return "SCE_GXM_TEXTURE_FORMAT_U16U16U16U16_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X16U16U16U16_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X16U16U16U16_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X16U16U16U16_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X16U16U16U16_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_U16U16U16X16_RGB1: return "SCE_GXM_TEXTURE_FORMAT_U16U16U16X16_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_U16U16U16X16_BGR1: return "SCE_GXM_TEXTURE_FORMAT_U16U16U16X16_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_ABGR: return "SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_ARGB: return "SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_RGBA: return "SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_BGRA: return "SCE_GXM_TEXTURE_FORMAT_S16S16S16S16_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X16S16S16S16_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X16S16S16S16_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X16S16S16S16_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X16S16S16S16_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_S16S16S16X16_RGB1: return "SCE_GXM_TEXTURE_FORMAT_S16S16S16X16_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_S16S16S16X16_BGR1: return "SCE_GXM_TEXTURE_FORMAT_S16S16S16X16_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_F32F32_00GR: return "SCE_GXM_TEXTURE_FORMAT_F32F32_00GR";
    case SCE_GXM_TEXTURE_FORMAT_F32F32_GRRR: return "SCE_GXM_TEXTURE_FORMAT_F32F32_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_F32F32_RGGG: return "SCE_GXM_TEXTURE_FORMAT_F32F32_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_F32F32_GRGR: return "SCE_GXM_TEXTURE_FORMAT_F32F32_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_F32F32_00RG: return "SCE_GXM_TEXTURE_FORMAT_F32F32_00RG";
    case SCE_GXM_TEXTURE_FORMAT_F32F32_GR: return "SCE_GXM_TEXTURE_FORMAT_F32F32_GR";

    case SCE_GXM_TEXTURE_FORMAT_U32U32_00GR: return "SCE_GXM_TEXTURE_FORMAT_U32U32_00GR";
    case SCE_GXM_TEXTURE_FORMAT_U32U32_GRRR: return "SCE_GXM_TEXTURE_FORMAT_U32U32_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_U32U32_RGGG: return "SCE_GXM_TEXTURE_FORMAT_U32U32_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_U32U32_GRGR: return "SCE_GXM_TEXTURE_FORMAT_U32U32_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_U32U32_00RG: return "SCE_GXM_TEXTURE_FORMAT_U32U32_00RG";
    case SCE_GXM_TEXTURE_FORMAT_U32U32_GR: return "SCE_GXM_TEXTURE_FORMAT_U32U32_GR";

    case SCE_GXM_TEXTURE_FORMAT_PVRT2BPP_ABGR: return "SCE_GXM_TEXTURE_FORMAT_PVRT2BPP_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_PVRT2BPP_1BGR: return "SCE_GXM_TEXTURE_FORMAT_PVRT2BPP_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_PVRT4BPP_ABGR: return "SCE_GXM_TEXTURE_FORMAT_PVRT4BPP_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_PVRT4BPP_1BGR: return "SCE_GXM_TEXTURE_FORMAT_PVRT4BPP_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_PVRTII2BPP_ABGR: return "SCE_GXM_TEXTURE_FORMAT_PVRTII2BPP_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_PVRTII2BPP_1BGR: return "SCE_GXM_TEXTURE_FORMAT_PVRTII2BPP_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_PVRTII4BPP_ABGR: return "SCE_GXM_TEXTURE_FORMAT_PVRTII4BPP_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_PVRTII4BPP_1BGR: return "SCE_GXM_TEXTURE_FORMAT_PVRTII4BPP_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_UBC1_ABGR: return "SCE_GXM_TEXTURE_FORMAT_UBC1_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_UBC1_1BGR: return "SCE_GXM_TEXTURE_FORMAT_UBC1_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_UBC2_ABGR: return "SCE_GXM_TEXTURE_FORMAT_UBC2_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_UBC2_1BGR: return "SCE_GXM_TEXTURE_FORMAT_UBC2_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_UBC3_ABGR: return "SCE_GXM_TEXTURE_FORMAT_UBC3_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_UBC3_1BGR: return "SCE_GXM_TEXTURE_FORMAT_UBC3_1BGR";

    case SCE_GXM_TEXTURE_FORMAT_UBC4_000R: return "SCE_GXM_TEXTURE_FORMAT_UBC4_000R";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_111R: return "SCE_GXM_TEXTURE_FORMAT_UBC4_111R";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_RRRR: return "SCE_GXM_TEXTURE_FORMAT_UBC4_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_0RRR: return "SCE_GXM_TEXTURE_FORMAT_UBC4_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_1RRR: return "SCE_GXM_TEXTURE_FORMAT_UBC4_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_R000: return "SCE_GXM_TEXTURE_FORMAT_UBC4_R000";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_R111: return "SCE_GXM_TEXTURE_FORMAT_UBC4_R111";
    case SCE_GXM_TEXTURE_FORMAT_UBC4_R: return "SCE_GXM_TEXTURE_FORMAT_UBC4_R";

    case SCE_GXM_TEXTURE_FORMAT_SBC4_000R: return "SCE_GXM_TEXTURE_FORMAT_SBC4_000R";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_111R: return "SCE_GXM_TEXTURE_FORMAT_SBC4_111R";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_RRRR: return "SCE_GXM_TEXTURE_FORMAT_SBC4_RRRR";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_0RRR: return "SCE_GXM_TEXTURE_FORMAT_SBC4_0RRR";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_1RRR: return "SCE_GXM_TEXTURE_FORMAT_SBC4_1RRR";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_R000: return "SCE_GXM_TEXTURE_FORMAT_SBC4_R000";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_R111: return "SCE_GXM_TEXTURE_FORMAT_SBC4_R111";
    case SCE_GXM_TEXTURE_FORMAT_SBC4_R: return "SCE_GXM_TEXTURE_FORMAT_SBC4_R";

    case SCE_GXM_TEXTURE_FORMAT_UBC5_00GR: return "SCE_GXM_TEXTURE_FORMAT_UBC5_00GR";
    case SCE_GXM_TEXTURE_FORMAT_UBC5_GRRR: return "SCE_GXM_TEXTURE_FORMAT_UBC5_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_UBC5_RGGG: return "SCE_GXM_TEXTURE_FORMAT_UBC5_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_UBC5_GRGR: return "SCE_GXM_TEXTURE_FORMAT_UBC5_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_UBC5_00RG: return "SCE_GXM_TEXTURE_FORMAT_UBC5_00RG";
    case SCE_GXM_TEXTURE_FORMAT_UBC5_GR: return "SCE_GXM_TEXTURE_FORMAT_UBC5_GR";

    case SCE_GXM_TEXTURE_FORMAT_SBC5_00GR: return "SCE_GXM_TEXTURE_FORMAT_SBC5_00GR";
    case SCE_GXM_TEXTURE_FORMAT_SBC5_GRRR: return "SCE_GXM_TEXTURE_FORMAT_SBC5_GRRR";
    case SCE_GXM_TEXTURE_FORMAT_SBC5_RGGG: return "SCE_GXM_TEXTURE_FORMAT_SBC5_RGGG";
    case SCE_GXM_TEXTURE_FORMAT_SBC5_GRGR: return "SCE_GXM_TEXTURE_FORMAT_SBC5_GRGR";
    case SCE_GXM_TEXTURE_FORMAT_SBC5_00RG: return "SCE_GXM_TEXTURE_FORMAT_SBC5_00RG";
    case SCE_GXM_TEXTURE_FORMAT_SBC5_GR: return "SCE_GXM_TEXTURE_FORMAT_SBC5_GR";

    case SCE_GXM_TEXTURE_FORMAT_YUV420P2_CSC0: return "SCE_GXM_TEXTURE_FORMAT_YUV420P2_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_YVU420P2_CSC0: return "SCE_GXM_TEXTURE_FORMAT_YVU420P2_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_YUV420P2_CSC1: return "SCE_GXM_TEXTURE_FORMAT_YUV420P2_CSC1";
    case SCE_GXM_TEXTURE_FORMAT_YVU420P2_CSC1: return "SCE_GXM_TEXTURE_FORMAT_YVU420P2_CSC1";

    case SCE_GXM_TEXTURE_FORMAT_YUV420P3_CSC0: return "SCE_GXM_TEXTURE_FORMAT_YUV420P3_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_YVU420P3_CSC0: return "SCE_GXM_TEXTURE_FORMAT_YVU420P3_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_YUV420P3_CSC1: return "SCE_GXM_TEXTURE_FORMAT_YUV420P3_CSC1";
    case SCE_GXM_TEXTURE_FORMAT_YVU420P3_CSC1: return "SCE_GXM_TEXTURE_FORMAT_YVU420P3_CSC1";

    case SCE_GXM_TEXTURE_FORMAT_YUYV422_CSC0: return "SCE_GXM_TEXTURE_FORMAT_YUYV422_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_YVYU422_CSC0: return "SCE_GXM_TEXTURE_FORMAT_YVYU422_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_UYVY422_CSC0: return "SCE_GXM_TEXTURE_FORMAT_UYVY422_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_VYUY422_CSC0: return "SCE_GXM_TEXTURE_FORMAT_VYUY422_CSC0";
    case SCE_GXM_TEXTURE_FORMAT_YUYV422_CSC1: return "SCE_GXM_TEXTURE_FORMAT_YUYV422_CSC1";
    case SCE_GXM_TEXTURE_FORMAT_YVYU422_CSC1: return "SCE_GXM_TEXTURE_FORMAT_YVYU422_CSC1";
    case SCE_GXM_TEXTURE_FORMAT_UYVY422_CSC1: return "SCE_GXM_TEXTURE_FORMAT_UYVY422_CSC1";
    case SCE_GXM_TEXTURE_FORMAT_VYUY422_CSC1: return "SCE_GXM_TEXTURE_FORMAT_VYUY422_CSC1";

    case SCE_GXM_TEXTURE_FORMAT_P4_ABGR: return "SCE_GXM_TEXTURE_FORMAT_P4_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_P4_ARGB: return "SCE_GXM_TEXTURE_FORMAT_P4_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_P4_RGBA: return "SCE_GXM_TEXTURE_FORMAT_P4_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_P4_BGRA: return "SCE_GXM_TEXTURE_FORMAT_P4_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_P4_1BGR: return "SCE_GXM_TEXTURE_FORMAT_P4_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_P4_1RGB: return "SCE_GXM_TEXTURE_FORMAT_P4_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_P4_RGB1: return "SCE_GXM_TEXTURE_FORMAT_P4_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_P4_BGR1: return "SCE_GXM_TEXTURE_FORMAT_P4_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_P8_ABGR: return "SCE_GXM_TEXTURE_FORMAT_P8_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_P8_ARGB: return "SCE_GXM_TEXTURE_FORMAT_P8_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_P8_RGBA: return "SCE_GXM_TEXTURE_FORMAT_P8_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_P8_BGRA: return "SCE_GXM_TEXTURE_FORMAT_P8_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_P8_1BGR: return "SCE_GXM_TEXTURE_FORMAT_P8_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_P8_1RGB: return "SCE_GXM_TEXTURE_FORMAT_P8_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_P8_RGB1: return "SCE_GXM_TEXTURE_FORMAT_P8_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_P8_BGR1: return "SCE_GXM_TEXTURE_FORMAT_P8_BGR1";

    case SCE_GXM_TEXTURE_FORMAT_U8U8U8_BGR: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8_BGR";
    case SCE_GXM_TEXTURE_FORMAT_U8U8U8_RGB: return "SCE_GXM_TEXTURE_FORMAT_U8U8U8_RGB";

    case SCE_GXM_TEXTURE_FORMAT_S8S8S8_BGR: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8_BGR";
    case SCE_GXM_TEXTURE_FORMAT_S8S8S8_RGB: return "SCE_GXM_TEXTURE_FORMAT_S8S8S8_RGB";

    case SCE_GXM_TEXTURE_FORMAT_U2F10F10F10_ABGR: return "SCE_GXM_TEXTURE_FORMAT_U2F10F10F10_ABGR";
    case SCE_GXM_TEXTURE_FORMAT_U2F10F10F10_ARGB: return "SCE_GXM_TEXTURE_FORMAT_U2F10F10F10_ARGB";
    case SCE_GXM_TEXTURE_FORMAT_F10F10F10U2_RGBA: return "SCE_GXM_TEXTURE_FORMAT_F10F10F10U2_RGBA";
    case SCE_GXM_TEXTURE_FORMAT_F10F10F10U2_BGRA: return "SCE_GXM_TEXTURE_FORMAT_F10F10F10U2_BGRA";
    case SCE_GXM_TEXTURE_FORMAT_X2F10F10F10_1BGR: return "SCE_GXM_TEXTURE_FORMAT_X2F10F10F10_1BGR";
    case SCE_GXM_TEXTURE_FORMAT_X2F10F10F10_1RGB: return "SCE_GXM_TEXTURE_FORMAT_X2F10F10F10_1RGB";
    case SCE_GXM_TEXTURE_FORMAT_F10F10F10X2_RGB1: return "SCE_GXM_TEXTURE_FORMAT_F10F10F10X2_RGB1";
    case SCE_GXM_TEXTURE_FORMAT_F10F10F10X2_BGR1: return "SCE_GXM_TEXTURE_FORMAT_F10F10F10X2_BGR1";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmColorSurfaceGammaMode>(const MemState &mem, SceGxmColorSurfaceGammaMode type) {
    switch (type) {
    case SCE_GXM_COLOR_SURFACE_GAMMA_NONE: return "SCE_GXM_COLOR_SURFACE_GAMMA_NONE";
    case SCE_GXM_COLOR_SURFACE_GAMMA_GR: return "SCE_GXM_COLOR_SURFACE_GAMMA_GR";
    case SCE_GXM_COLOR_SURFACE_GAMMA_BGR: return "SCE_GXM_COLOR_SURFACE_GAMMA_BGR";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmDepthStencilFormat>(const MemState &mem, SceGxmDepthStencilFormat type) {
    switch (type) {
    case SCE_GXM_DEPTH_STENCIL_FORMAT_DF32: return "SCE_GXM_DEPTH_STENCIL_FORMAT_DF32";
    case SCE_GXM_DEPTH_STENCIL_FORMAT_S8: return "SCE_GXM_DEPTH_STENCIL_FORMAT_S8";
    case SCE_GXM_DEPTH_STENCIL_FORMAT_DF32_S8: return "SCE_GXM_DEPTH_STENCIL_FORMAT_DF32_S8";
    case SCE_GXM_DEPTH_STENCIL_FORMAT_DF32M: return "SCE_GXM_DEPTH_STENCIL_FORMAT_DF32M";
    case SCE_GXM_DEPTH_STENCIL_FORMAT_DF32M_S8: return "SCE_GXM_DEPTH_STENCIL_FORMAT_DF32M_S8";
    case SCE_GXM_DEPTH_STENCIL_FORMAT_S8D24: return "SCE_GXM_DEPTH_STENCIL_FORMAT_S8D24";
    case SCE_GXM_DEPTH_STENCIL_FORMAT_D16: return "SCE_GXM_DEPTH_STENCIL_FORMAT_D16";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmDepthStencilSurfaceType>(const MemState &mem, SceGxmDepthStencilSurfaceType type) {
    switch (type) {
    case SCE_GXM_DEPTH_STENCIL_SURFACE_LINEAR: return "SCE_GXM_DEPTH_STENCIL_SURFACE_LINEAR";
    case SCE_GXM_DEPTH_STENCIL_SURFACE_TILED: return "SCE_GXM_DEPTH_STENCIL_SURFACE_TILED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmDepthStencilForceLoadMode>(const MemState &mem, SceGxmDepthStencilForceLoadMode type) {
    switch (type) {
    case SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_DISABLED: return "SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_DISABLED";
    case SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_ENABLED: return "SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmDepthStencilForceStoreMode>(const MemState &mem, SceGxmDepthStencilForceStoreMode type) {
    switch (type) {
    case SCE_GXM_DEPTH_STENCIL_FORCE_STORE_DISABLED: return "SCE_GXM_DEPTH_STENCIL_FORCE_STORE_DISABLED";
    case SCE_GXM_DEPTH_STENCIL_FORCE_STORE_ENABLED: return "SCE_GXM_DEPTH_STENCIL_FORCE_STORE_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmPrimitiveType>(const MemState &mem, SceGxmPrimitiveType type) {
    switch (type) {
    case SCE_GXM_PRIMITIVE_TRIANGLES: return "SCE_GXM_PRIMITIVE_TRIANGLES";
    case SCE_GXM_PRIMITIVE_LINES: return "SCE_GXM_PRIMITIVE_LINES";
    case SCE_GXM_PRIMITIVE_POINTS: return "SCE_GXM_PRIMITIVE_POINTS";
    case SCE_GXM_PRIMITIVE_TRIANGLE_STRIP: return "SCE_GXM_PRIMITIVE_TRIANGLE_STRIP";
    case SCE_GXM_PRIMITIVE_TRIANGLE_FAN: return "SCE_GXM_PRIMITIVE_TRIANGLE_FAN";
    case SCE_GXM_PRIMITIVE_TRIANGLE_EDGES: return "SCE_GXM_PRIMITIVE_TRIANGLE_EDGES";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmIndexFormat>(const MemState &mem, SceGxmIndexFormat type) {
    switch (type) {
    case SCE_GXM_INDEX_FORMAT_U16: return "SCE_GXM_INDEX_FORMAT_U16";
    case SCE_GXM_INDEX_FORMAT_U32: return "SCE_GXM_INDEX_FORMAT_U32";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmParameterSemantic>(const MemState &mem, SceGxmParameterSemantic type) {
    switch (type) {
    case SCE_GXM_PARAMETER_SEMANTIC_NONE: return "SCE_GXM_PARAMETER_SEMANTIC_NONE";
    case SCE_GXM_PARAMETER_SEMANTIC_ATTR: return "SCE_GXM_PARAMETER_SEMANTIC_ATTR";
    case SCE_GXM_PARAMETER_SEMANTIC_BCOL: return "SCE_GXM_PARAMETER_SEMANTIC_BCOL";
    case SCE_GXM_PARAMETER_SEMANTIC_BINORMAL: return "SCE_GXM_PARAMETER_SEMANTIC_BINORMAL";
    case SCE_GXM_PARAMETER_SEMANTIC_BLENDINDICES: return "SCE_GXM_PARAMETER_SEMANTIC_BLENDINDICES";
    case SCE_GXM_PARAMETER_SEMANTIC_BLENDWEIGHT: return "SCE_GXM_PARAMETER_SEMANTIC_BLENDWEIGHT";
    case SCE_GXM_PARAMETER_SEMANTIC_COLOR: return "SCE_GXM_PARAMETER_SEMANTIC_COLOR";
    case SCE_GXM_PARAMETER_SEMANTIC_DIFFUSE: return "SCE_GXM_PARAMETER_SEMANTIC_DIFFUSE";
    case SCE_GXM_PARAMETER_SEMANTIC_FOGCOORD: return "SCE_GXM_PARAMETER_SEMANTIC_FOGCOORD";
    case SCE_GXM_PARAMETER_SEMANTIC_NORMAL: return "SCE_GXM_PARAMETER_SEMANTIC_NORMAL";
    case SCE_GXM_PARAMETER_SEMANTIC_POINTSIZE: return "SCE_GXM_PARAMETER_SEMANTIC_POINTSIZE";
    case SCE_GXM_PARAMETER_SEMANTIC_POSITION: return "SCE_GXM_PARAMETER_SEMANTIC_POSITION";
    case SCE_GXM_PARAMETER_SEMANTIC_SPECULAR: return "SCE_GXM_PARAMETER_SEMANTIC_SPECULAR";
    case SCE_GXM_PARAMETER_SEMANTIC_TANGENT: return "SCE_GXM_PARAMETER_SEMANTIC_TANGENT";
    case SCE_GXM_PARAMETER_SEMANTIC_TEXCOORD: return "SCE_GXM_PARAMETER_SEMANTIC_TEXCOORD";
    case SCE_GXM_PARAMETER_SEMANTIC_INDEX: return "SCE_GXM_PARAMETER_SEMANTIC_INDEX";
    case SCE_GXM_PARAMETER_SEMANTIC_INSTANCE: return "SCE_GXM_PARAMETER_SEMANTIC_INSTANCE";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmDepthFunc>(const MemState &mem, SceGxmDepthFunc type) {
    switch (type) {
    case SCE_GXM_DEPTH_FUNC_NEVER: return "SCE_GXM_DEPTH_FUNC_NEVER";
    case SCE_GXM_DEPTH_FUNC_LESS: return "SCE_GXM_DEPTH_FUNC_LESS";
    case SCE_GXM_DEPTH_FUNC_EQUAL: return "SCE_GXM_DEPTH_FUNC_EQUAL";
    case SCE_GXM_DEPTH_FUNC_LESS_EQUAL: return "SCE_GXM_DEPTH_FUNC_LESS_EQUAL";
    case SCE_GXM_DEPTH_FUNC_GREATER: return "SCE_GXM_DEPTH_FUNC_GREATER";
    case SCE_GXM_DEPTH_FUNC_NOT_EQUAL: return "SCE_GXM_DEPTH_FUNC_NOT_EQUAL";
    case SCE_GXM_DEPTH_FUNC_GREATER_EQUAL: return "SCE_GXM_DEPTH_FUNC_GREATER_EQUAL";
    case SCE_GXM_DEPTH_FUNC_ALWAYS: return "SCE_GXM_DEPTH_FUNC_ALWAYS";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmDepthWriteMode>(const MemState &mem, SceGxmDepthWriteMode type) {
    switch (type) {
    case SCE_GXM_DEPTH_WRITE_DISABLED: return "SCE_GXM_DEPTH_WRITE_DISABLED";
    case SCE_GXM_DEPTH_WRITE_ENABLED: return "SCE_GXM_DEPTH_WRITE_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmFragmentProgramMode>(const MemState &mem, SceGxmFragmentProgramMode type) {
    switch (type) {
    case SCE_GXM_FRAGMENT_PROGRAM_DISABLED: return "SCE_GXM_FRAGMENT_PROGRAM_DISABLED";
    case SCE_GXM_FRAGMENT_PROGRAM_ENABLED: return "SCE_GXM_FRAGMENT_PROGRAM_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmLineFillLastPixelMode>(const MemState &mem, SceGxmLineFillLastPixelMode type) {
    switch (type) {
    case SCE_GXM_LINE_FILL_LAST_PIXEL_DISABLED: return "SCE_GXM_LINE_FILL_LAST_PIXEL_DISABLED";
    case SCE_GXM_LINE_FILL_LAST_PIXEL_ENABLED: return "SCE_GXM_LINE_FILL_LAST_PIXEL_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmPolygonMode>(const MemState &mem, SceGxmPolygonMode type) {
    switch (type) {
    case SCE_GXM_POLYGON_MODE_TRIANGLE_FILL: return "SCE_GXM_POLYGON_MODE_TRIANGLE_FILL";
    case SCE_GXM_POLYGON_MODE_LINE: return "SCE_GXM_POLYGON_MODE_LINE";
    case SCE_GXM_POLYGON_MODE_POINT_10UV: return "SCE_GXM_POLYGON_MODE_POINT_10UV";
    case SCE_GXM_POLYGON_MODE_POINT: return "SCE_GXM_POLYGON_MODE_POINT";
    case SCE_GXM_POLYGON_MODE_POINT_01UV: return "SCE_GXM_POLYGON_MODE_POINT_01UV";
    case SCE_GXM_POLYGON_MODE_TRIANGLE_LINE: return "SCE_GXM_POLYGON_MODE_TRIANGLE_LINE";
    case SCE_GXM_POLYGON_MODE_TRIANGLE_POINT: return "SCE_GXM_POLYGON_MODE_TRIANGLE_POINT";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmStencilFunc>(const MemState &mem, SceGxmStencilFunc type) {
    switch (type) {
    case SCE_GXM_STENCIL_FUNC_NEVER: return "SCE_GXM_STENCIL_FUNC_NEVER";
    case SCE_GXM_STENCIL_FUNC_LESS: return "SCE_GXM_STENCIL_FUNC_LESS";
    case SCE_GXM_STENCIL_FUNC_EQUAL: return "SCE_GXM_STENCIL_FUNC_EQUAL";
    case SCE_GXM_STENCIL_FUNC_LESS_EQUAL: return "SCE_GXM_STENCIL_FUNC_LESS_EQUAL";
    case SCE_GXM_STENCIL_FUNC_GREATER: return "SCE_GXM_STENCIL_FUNC_GREATER";
    case SCE_GXM_STENCIL_FUNC_NOT_EQUAL: return "SCE_GXM_STENCIL_FUNC_NOT_EQUAL";
    case SCE_GXM_STENCIL_FUNC_GREATER_EQUAL: return "SCE_GXM_STENCIL_FUNC_GREATER_EQUAL";
    case SCE_GXM_STENCIL_FUNC_ALWAYS: return "SCE_GXM_STENCIL_FUNC_ALWAYS";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmStencilOp>(const MemState &mem, SceGxmStencilOp type) {
    switch (type) {
    case SCE_GXM_STENCIL_OP_KEEP: return "SCE_GXM_STENCIL_OP_KEEP";
    case SCE_GXM_STENCIL_OP_ZERO: return "SCE_GXM_STENCIL_OP_ZERO";
    case SCE_GXM_STENCIL_OP_REPLACE: return "SCE_GXM_STENCIL_OP_REPLACE";
    case SCE_GXM_STENCIL_OP_INCR: return "SCE_GXM_STENCIL_OP_INCR";
    case SCE_GXM_STENCIL_OP_DECR: return "SCE_GXM_STENCIL_OP_DECR";
    case SCE_GXM_STENCIL_OP_INVERT: return "SCE_GXM_STENCIL_OP_INVERT";
    case SCE_GXM_STENCIL_OP_INCR_WRAP: return "SCE_GXM_STENCIL_OP_INCR_WRAP";
    case SCE_GXM_STENCIL_OP_DECR_WRAP: return "SCE_GXM_STENCIL_OP_DECR_WRAP";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmVisibilityTestMode>(const MemState &mem, SceGxmVisibilityTestMode type) {
    switch (type) {
    case SCE_GXM_VISIBILITY_TEST_DISABLED: return "SCE_GXM_VISIBILITY_TEST_DISABLED";
    case SCE_GXM_VISIBILITY_TEST_ENABLED: return "SCE_GXM_VISIBILITY_TEST_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmVisibilityTestOp>(const MemState &mem, SceGxmVisibilityTestOp type) {
    switch (type) {
    case SCE_GXM_VISIBILITY_TEST_OP_INCREMENT: return "SCE_GXM_VISIBILITY_TEST_OP_INCREMENT";
    case SCE_GXM_VISIBILITY_TEST_OP_SET: return "SCE_GXM_VISIBILITY_TEST_OP_SET";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmCullMode>(const MemState &mem, SceGxmCullMode type) {
    switch (type) {
    case SCE_GXM_CULL_NONE: return "SCE_GXM_CULL_NONE";
    case SCE_GXM_CULL_CW: return "SCE_GXM_CULL_CW";
    case SCE_GXM_CULL_CCW: return "SCE_GXM_CULL_CCW";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmRegionClipMode>(const MemState &mem, SceGxmRegionClipMode type) {
    switch (type) {
    case SCE_GXM_REGION_CLIP_NONE: return "SCE_GXM_REGION_CLIP_NONE";
    case SCE_GXM_REGION_CLIP_ALL: return "SCE_GXM_REGION_CLIP_ALL";
    case SCE_GXM_REGION_CLIP_OUTSIDE: return "SCE_GXM_REGION_CLIP_OUTSIDE";
    case SCE_GXM_REGION_CLIP_INSIDE: return "SCE_GXM_REGION_CLIP_INSIDE";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTwoSidedMode>(const MemState &mem, SceGxmTwoSidedMode type) {
    switch (type) {
    case SCE_GXM_TWO_SIDED_DISABLED: return "SCE_GXM_TWO_SIDED_DISABLED";
    case SCE_GXM_TWO_SIDED_ENABLED: return "SCE_GXM_TWO_SIDED_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmViewportMode>(const MemState &mem, SceGxmViewportMode type) {
    switch (type) {
    case SCE_GXM_VIEWPORT_DISABLED: return "SCE_GXM_VIEWPORT_DISABLED";
    case SCE_GXM_VIEWPORT_ENABLED: return "SCE_GXM_VIEWPORT_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmOutputRegisterFormat>(const MemState &mem, SceGxmOutputRegisterFormat type) {
    switch (type) {
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_DECLARED: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_DECLARED";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_CHAR4: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_CHAR4";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_USHORT2: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_USHORT2";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_SHORT2: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_SHORT2";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_HALF4: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_HALF4";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_HALF2: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_HALF2";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_FLOAT2: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_FLOAT2";
    case SCE_GXM_OUTPUT_REGISTER_FORMAT_FLOAT: return "SCE_GXM_OUTPUT_REGISTER_FORMAT_FLOAT";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmMultisampleMode>(const MemState &mem, SceGxmMultisampleMode type) {
    switch (type) {
    case SCE_GXM_MULTISAMPLE_NONE: return "SCE_GXM_MULTISAMPLE_NONE";
    case SCE_GXM_MULTISAMPLE_2X: return "SCE_GXM_MULTISAMPLE_2X";
    case SCE_GXM_MULTISAMPLE_4X: return "SCE_GXM_MULTISAMPLE_4X";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTextureGammaMode>(const MemState &mem, SceGxmTextureGammaMode type) {
    switch (type) {
    case SCE_GXM_TEXTURE_GAMMA_NONE: return "SCE_GXM_TEXTURE_GAMMA_NONE";
    case SCE_GXM_TEXTURE_GAMMA_GR: return "SCE_GXM_TEXTURE_GAMMA_GR";
    case SCE_GXM_TEXTURE_GAMMA_BGR: return "SCE_GXM_TEXTURE_GAMMA_BGR";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTextureFilter>(const MemState &mem, SceGxmTextureFilter type) {
    switch (type) {
    case SCE_GXM_TEXTURE_FILTER_POINT: return "SCE_GXM_TEXTURE_FILTER_POINT";
    case SCE_GXM_TEXTURE_FILTER_LINEAR: return "SCE_GXM_TEXTURE_FILTER_LINEAR";
    case SCE_GXM_TEXTURE_FILTER_MIPMAP_LINEAR: return "SCE_GXM_TEXTURE_FILTER_MIPMAP_LINEAR";
    case SCE_GXM_TEXTURE_FILTER_MIPMAP_POINT: return "SCE_GXM_TEXTURE_FILTER_MIPMAP_POINT";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTextureMipFilter>(const MemState &mem, SceGxmTextureMipFilter type) {
    switch (type) {
    case SCE_GXM_TEXTURE_MIP_FILTER_DISABLED: return "SCE_GXM_TEXTURE_MIP_FILTER_DISABLED";
    case SCE_GXM_TEXTURE_MIP_FILTER_ENABLED: return "SCE_GXM_TEXTURE_MIP_FILTER_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTextureNormalizeMode>(const MemState &mem, SceGxmTextureNormalizeMode type) {
    switch (type) {
    case SCE_GXM_TEXTURE_NORMALIZE_DISABLED: return "SCE_GXM_TEXTURE_NORMALIZE_DISABLED";
    case SCE_GXM_TEXTURE_NORMALIZE_ENABLED: return "SCE_GXM_TEXTURE_NORMALIZE_ENABLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTextureAddrMode>(const MemState &mem, SceGxmTextureAddrMode type) {
    switch (type) {
    case SCE_GXM_TEXTURE_ADDR_REPEAT: return "SCE_GXM_TEXTURE_ADDR_REPEAT";
    case SCE_GXM_TEXTURE_ADDR_MIRROR: return "SCE_GXM_TEXTURE_ADDR_MIRROR";
    case SCE_GXM_TEXTURE_ADDR_CLAMP: return "SCE_GXM_TEXTURE_ADDR_CLAMP";
    case SCE_GXM_TEXTURE_ADDR_MIRROR_CLAMP: return "SCE_GXM_TEXTURE_ADDR_MIRROR_CLAMP";
    case SCE_GXM_TEXTURE_ADDR_REPEAT_IGNORE_BORDER: return "SCE_GXM_TEXTURE_ADDR_REPEAT_IGNORE_BORDER";
    case SCE_GXM_TEXTURE_ADDR_CLAMP_FULL_BORDER: return "SCE_GXM_TEXTURE_ADDR_CLAMP_FULL_BORDER";
    case SCE_GXM_TEXTURE_ADDR_CLAMP_IGNORE_BORDER: return "SCE_GXM_TEXTURE_ADDR_CLAMP_IGNORE_BORDER";
    case SCE_GXM_TEXTURE_ADDR_CLAMP_HALF_BORDER: return "SCE_GXM_TEXTURE_ADDR_CLAMP_HALF_BORDER";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTransferColorKeyMode>(const MemState &mem, SceGxmTransferColorKeyMode type) {
    switch (type) {
    case SCE_GXM_TRANSFER_COLORKEY_NONE: return "SCE_GXM_TRANSFER_COLORKEY_NONE";
    case SCE_GXM_TRANSFER_COLORKEY_PASS: return "SCE_GXM_TRANSFER_COLORKEY_PASS";
    case SCE_GXM_TRANSFER_COLORKEY_REJECT: return "SCE_GXM_TRANSFER_COLORKEY_REJECT";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTransferFormat>(const MemState &mem, SceGxmTransferFormat type) {
    switch (type) {
    case SCE_GXM_TRANSFER_FORMAT_U8_R: return "SCE_GXM_TRANSFER_FORMAT_U8_R";
    case SCE_GXM_TRANSFER_FORMAT_U4U4U4U4_ABGR: return "SCE_GXM_TRANSFER_FORMAT_U4U4U4U4_ABGR";
    case SCE_GXM_TRANSFER_FORMAT_U1U5U5U5_ABGR: return "SCE_GXM_TRANSFER_FORMAT_U1U5U5U5_ABGR";
    case SCE_GXM_TRANSFER_FORMAT_U5U6U5_BGR: return "SCE_GXM_TRANSFER_FORMAT_U5U6U5_BGR";
    case SCE_GXM_TRANSFER_FORMAT_U8U8_GR: return "SCE_GXM_TRANSFER_FORMAT_U8U8_GR";
    case SCE_GXM_TRANSFER_FORMAT_U8U8U8_BGR: return "SCE_GXM_TRANSFER_FORMAT_U8U8U8_BGR";
    case SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR: return "SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR";
    case SCE_GXM_TRANSFER_FORMAT_VYUY422: return "SCE_GXM_TRANSFER_FORMAT_VYUY422";
    case SCE_GXM_TRANSFER_FORMAT_YVYU422: return "SCE_GXM_TRANSFER_FORMAT_YVYU422";
    case SCE_GXM_TRANSFER_FORMAT_UYVY422: return "SCE_GXM_TRANSFER_FORMAT_UYVY422";
    case SCE_GXM_TRANSFER_FORMAT_YUYV422: return "SCE_GXM_TRANSFER_FORMAT_YUYV422";
    case SCE_GXM_TRANSFER_FORMAT_U2U10U10U10_ABGR: return "SCE_GXM_TRANSFER_FORMAT_U2U10U10U10_ABGR";
    case SCE_GXM_TRANSFER_FORMAT_RAW16: return "SCE_GXM_TRANSFER_FORMAT_RAW16";
    case SCE_GXM_TRANSFER_FORMAT_RAW32: return "SCE_GXM_TRANSFER_FORMAT_RAW32";
    case SCE_GXM_TRANSFER_FORMAT_RAW64: return "SCE_GXM_TRANSFER_FORMAT_RAW64";
    case SCE_GXM_TRANSFER_FORMAT_RAW128: return "SCE_GXM_TRANSFER_FORMAT_RAW128";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTransferType>(const MemState &mem, SceGxmTransferType type) {
    switch (type) {
    case SCE_GXM_TRANSFER_LINEAR: return "SCE_GXM_TRANSFER_LINEAR";
    case SCE_GXM_TRANSFER_TILED: return "SCE_GXM_TRANSFER_TILED";
    case SCE_GXM_TRANSFER_SWIZZLED: return "SCE_GXM_TRANSFER_SWIZZLED";
    }
    return std::to_string(type);
}

template <>
std::string to_debug_str<SceGxmTransferFlags>(const MemState &mem, SceGxmTransferFlags type) {
    switch (type) {
    case SCE_GXM_TRANSFER_FRAGMENT_SYNC: return "SCE_GXM_TRANSFER_FRAGMENT_SYNC";
    case SCE_GXM_TRANSFER_VERTEX_SYNC: return "SCE_GXM_TRANSFER_VERTEX_SYNC";
    }
    return std::to_string(type);
}

static void display_entry_thread(EmuEnvState &emuenv) {
    auto &display_queue = emuenv.gxm.display_queue;
    const Address callback_address = emuenv.gxm.params.displayQueueCallback.address();
    const ThreadStatePtr display_thread = emuenv.kernel.get_thread(emuenv.gxm.display_queue_thread);
    if (!display_thread) {
        LOG_CRITICAL("display_thread not found. thid:{}", emuenv.gxm.display_queue_thread);
        return;
    }

    while (true) {
        auto display_callback = display_queue.top();
        if (!display_callback)
            break;

        SceGxmSyncObject *old_sync = display_callback->old_sync.get(emuenv.mem);
        SceGxmSyncObject *new_sync = display_callback->new_sync.get(emuenv.mem);

        // sceGxmDisplayQueueAddEntry waits for both buffers to complete
        renderer::wishlist(old_sync, display_callback->old_sync_timestamp);
        if (old_sync != new_sync)
            renderer::wishlist(new_sync, display_callback->new_sync_timestamp);

        // now we can remove the thread from the display queue
        display_queue.pop();

        // specify whether the call to SceDisplaySetFrameBuf is expected to do something
        emuenv.display.predicting = display_callback->frame_predicted;
        emuenv.display.current_sync_object = display_callback->new_sync.address();

        // Now run callback
        display_thread->run_guest_function(callback_address, display_callback->data);

        // Notifies the renderer of the completion of the callback for the display_entry.
        // The last_display of the entry, when pushed into the queue, is guaranteed to be timestamp_ahead + 1 at the time of the call.
        renderer::subject_done(old_sync, display_callback->old_sync_timestamp + 1);
        if (old_sync != new_sync)
            renderer::subject_done(new_sync, display_callback->new_sync_timestamp + 1);

        free(emuenv.mem, display_callback->data);
    }
}

static Ptr<void> gxmRunDeferredMemoryCallback(KernelState &kernel, const MemState &mem, std::mutex &global_lock, std::uint32_t &return_size, Ptr<SceGxmDeferredContextCallback> callback, Ptr<void> userdata,
    const std::uint32_t size, const SceUID thread_id) {
    const std::lock_guard<std::mutex> guard(global_lock);

    const ThreadStatePtr thread = kernel.get_thread(thread_id);
    const Address final_size_addr = stack_alloc(*thread->cpu, 4);

    Ptr<void> result(thread->run_callback(callback.address(), { userdata.address(), size, final_size_addr }));

    return_size = *Ptr<std::uint32_t>(final_size_addr).get(mem);
    stack_free(*thread->cpu, 4);

    return result;
}

struct SceGxmCommandList;

// Represent the data range in the vita memory that is taken by a command list
// If the command list goes through multiple vdm buffers there will be multiple command ranges
struct CommandListRange {
    Address start;
    Address end;
    SceGxmCommandList *command_list;

    bool operator<(const CommandListRange &other) const {
        // the ranges are all disjoints, so no problem for ordering
        return start < other.start;
    }
};

typedef std::set<CommandListRange>::iterator RangeIterator;

struct SceGxmCommandList {
    renderer::CommandList *list;

    // the locations on the vita memory that correspond to this command list
    // this part is not copied in the command list given to the game by endCommandList
    std::stack<RangeIterator> memory_ranges;
};

// Seems on real vita, this is the maximum size, I got stack corrupt if try to write more
static_assert(sizeof(SceGxmCommandList) - sizeof(std::stack<CommandListRange>) <= 32);

struct SceGxmContext {
    GxmContextState state;

    std::unique_ptr<renderer::Context> renderer;

    std::mutex &callback_lock;

    Ptr<uint8_t> alloc_space{};
    Ptr<uint8_t> alloc_space_end{};

    // for immediate context only
    // we use the fact that everything is done in an ordered manner
    // (i.e if command a is allocated before b, then it is freed before b)
    // the real positions are these ones modulo command_allocator_size
    size_t command_next_free_pos;
    // this one is atomic as it is read from one thread and written to by another
    std::atomic<size_t> command_last_free_pos;
    size_t command_allocator_size = 0;

    bool last_precomputed = false;

    // this is used for deferred contexts
    Ptr<uint8_t> alloc_space_start{};
    std::set<CommandListRange> command_list_ranges;
    SceGxmCommandList *curr_command_list = nullptr;

    // tell if a call to set_texture must be made
    gxp::TextureInfo is_vert_texture_dirty;
    gxp::TextureInfo is_frag_texture_dirty;

    bool was_vert_default_uniform_reserved = false;
    bool was_frag_default_uniform_reserved = false;

    explicit SceGxmContext(std::mutex &callback_lock_)
        : callback_lock(callback_lock_) {
    }

    void reset_recording() {
        last_precomputed = false;
    }

    void free_command_list(SceGxmCommandList *command_list) {
        // command list has been overwritten, free the memory
        // everything except the command_list except was allocated using malloc
        renderer::Command *cmd = command_list->list->first;
        while (cmd != command_list->list->last) {
            renderer::Command *next = cmd->next;
            free(cmd);
            cmd = next;
        }
        free(cmd);
        free(command_list->list);

        // we also need to delete all ranges occupied by this list
        while (!command_list->memory_ranges.empty()) {
            command_list_ranges.erase(command_list->memory_ranges.top());
            command_list->memory_ranges.pop();
        }

        delete command_list;
    }

    // check if there are any command list which have been overwritten, meaning we can free their content
    void deferred_check_for_free(const CommandListRange &new_range) {
        // delete all command list that overlaps
        RangeIterator it = command_list_ranges.upper_bound(new_range);
        if (it != command_list_ranges.begin()) {
            // get the first element before
            --it;
            if (it->end <= new_range.start) {
                ++it;
            }
        }

        // now we can go from left to right and stop when it doesn't overlap anymore;
        while (it != command_list_ranges.end() && it->start < new_range.end) {
            RangeIterator to_delete = it;
            // we must keep it not invalidated
            while (it != command_list_ranges.end() && it->command_list == to_delete->command_list)
                ++it;

            free_command_list(to_delete->command_list);
        }
    }

    // insert new memory range used by a command list
    void insert_new_memory_range() {
        CommandListRange range = {
            alloc_space_start.address(),
            alloc_space.address(),
            curr_command_list
        };
        deferred_check_for_free(range);
        RangeIterator it = command_list_ranges.emplace(std::move(range)).first;
        curr_command_list->memory_ranges.push(it);
    }

    bool make_new_alloc_space(KernelState &kern, const MemState &mem, const SceUID thread_id, bool force = false) {
        if (alloc_space && (state.type == SCE_GXM_CONTEXT_TYPE_IMMEDIATE)) {
            return false;
        }

        if (!force && alloc_space && alloc_space < alloc_space_end) {
            // we already have spare space
            return true;
        }

        if (state.active && state.type == SCE_GXM_CONTEXT_TYPE_DEFERRED) {
            // update memory ranges
            insert_new_memory_range();
        }

        std::uint32_t actual_size = 0;

        if (state.vdm_buffer && state.vdm_buffer_size > 0) {
            alloc_space = state.vdm_buffer.cast<uint8_t>();
            actual_size = state.vdm_buffer_size;

            if (state.type == SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
                command_allocator_size = state.vdm_buffer_size / sizeof(renderer::Command);
                command_next_free_pos = 0;
                command_last_free_pos = command_allocator_size - 1;
            } else {
                // setting the vdm buffer size to 0 means we are using it
                state.vdm_buffer_size = 0;
            }
        } else {
            constexpr uint32_t DEFAULT_SIZE = 1024;

            Ptr<void> space = gxmRunDeferredMemoryCallback(kern, mem, callback_lock, actual_size, state.vdm_memory_callback,
                state.memory_callback_userdata, DEFAULT_SIZE, thread_id);

            if (!space) {
                LOG_ERROR("VDM callback runs out of memory!");
                return false;
            }

            alloc_space = space.cast<uint8_t>();
        }

        alloc_space_start = alloc_space;
        alloc_space_end = alloc_space + actual_size;
        return true;
    }

    std::uint8_t *linearly_allocate(KernelState &kern, const MemState &mem, const SceUID thread_id, const std::uint32_t size) {
        if (state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
            return nullptr;
        }

        // allocate 4 bytes in the vdm memory to make it look like the vdm buffer is getting used
        // otherwise we would never know when to free our command lists
        constexpr uint32_t allocated_on_vdm = 4;

        if (alloc_space.address() + allocated_on_vdm > alloc_space_end.address()) {
            if (!make_new_alloc_space(kern, mem, thread_id, true)) {
                return nullptr;
            }
        }

        alloc_space = alloc_space + allocated_on_vdm;

        // the data returned is not part of the vita memory (our commands are too big and do not fit)
        return static_cast<uint8_t *>(malloc(size));
    }

    template <typename T>
    T *linearly_allocate(KernelState &kern, const MemState &mem, const SceUID thread_id) {
        return reinterpret_cast<T *>(linearly_allocate(kern, mem, thread_id, sizeof(T)));
    }

    renderer::Command *allocate_new_command(KernelState &kern, const MemState &mem, SceUID current_thread_id) {
        renderer::Command *new_command = nullptr;

        if (state.type == SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
            if (command_allocator_size > 0 && command_next_free_pos <= command_last_free_pos) {
                size_t offset = command_next_free_pos % command_allocator_size;
                command_next_free_pos++;
                new_command = alloc_space.cast<renderer::Command>().get(mem) + offset;
                new (new_command) renderer::Command;
            } else {
                new_command = new renderer::Command;
                new_command->flags |= renderer::Command::FLAG_FROM_HOST;
            }
        } else {
            new_command = linearly_allocate<renderer::Command>(kern, mem, current_thread_id);

            new (new_command) renderer::Command;
            new_command->flags |= renderer::Command::FLAG_NO_FREE;
        }

        return new_command;
    }

    void free_new_command(renderer::Command *cmd) {
        if (!(cmd->flags & renderer::Command::FLAG_NO_FREE)) {
            if (cmd->flags & renderer::Command::FLAG_FROM_HOST) {
                delete cmd;
            } else {
                ++command_last_free_pos;
            }
        }
    }
};

struct SceGxmRenderTarget {
    std::unique_ptr<renderer::RenderTarget> renderer;
    std::uint16_t width;
    std::uint16_t height;
    std::uint16_t scenesPerFrame;
    SceUID driverMemBlock;
};

typedef std::uint32_t VertexCacheHash;

struct VertexProgramCacheKey {
    SceGxmRegisteredProgram vertex_program;
    VertexCacheHash hash;
};

typedef std::map<VertexProgramCacheKey, Ptr<SceGxmVertexProgram>> VertexProgramCache;

struct FragmentProgramCacheKey {
    SceGxmRegisteredProgram fragment_program;
    SceGxmBlendInfo blend_info;
};

typedef std::map<FragmentProgramCacheKey, Ptr<SceGxmFragmentProgram>> FragmentProgramCache;

struct SceGxmShaderPatcher {
    VertexProgramCache vertex_program_cache;
    FragmentProgramCache fragment_program_cache;
    SceGxmShaderPatcherParams params;
};

// clang-format off
static const size_t size_mask_gxp = 228;
static const uint8_t mask_gxp[] = {
    0x47, 0x58, 0x50, 0x00, 0x01, 0x05, 0x50, 0x03, 0xE1, 0x00, 0x00, 0x00, 0xF6, 0x94, 0xF3, 0x74, 
	0x73, 0x30, 0xEE, 0xE2, 0x01, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 
	0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 
	0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0xC0, 0x3D, 0x03, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 
	0x01, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x44, 0xFA, 0x00, 0x00, 0x00, 0xE0, 
	0x7E, 0x0D, 0x81, 0x40, 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, 0x00, 0x00, 
	0x01, 0xE1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x73, 0x6B, 
	0x00, 0x00, 0x00, 0x00, 
};
// clang-format on

static constexpr std::uint32_t DEFAULT_RING_SIZE = 4096;

static VertexCacheHash hash_data(const void *data, size_t size) {
    auto hash = XXH3_64bits(data, size);
    return static_cast<VertexCacheHash>(hash);
}

static bool operator<(const SceGxmRegisteredProgram &a, const SceGxmRegisteredProgram &b) {
    return a.program < b.program;
}

static bool operator<(const VertexProgramCacheKey &a, const VertexProgramCacheKey &b) {
    if (a.vertex_program < b.vertex_program) {
        return true;
    }
    if (b.vertex_program < a.vertex_program) {
        return false;
    }
    return b.hash < a.hash;
}

static bool operator<(const SceGxmBlendInfo &a, const SceGxmBlendInfo &b) {
    return memcmp(&a, &b, sizeof(a)) < 0;
}

static bool operator<(const FragmentProgramCacheKey &a, const FragmentProgramCacheKey &b) {
    if (a.fragment_program < b.fragment_program) {
        return true;
    }
    if (b.fragment_program < a.fragment_program) {
        return false;
    }
    return b.blend_info < a.blend_info;
}

static int init_texture_base(const char *export_name, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat tex_format, uint32_t width, uint32_t height, uint32_t mipCount,
    const SceGxmTextureType &texture_type) {
    if (width > 4096 || height > 4096 || mipCount > 13) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }
    memset(texture, 0, sizeof(SceGxmTexture));

    texture->mip_count = std::min<std::uint32_t>(15, mipCount - 1);
    texture->format0 = (tex_format & 0x80000000) >> 31;
    texture->lod_bias = 31;
    texture->gamma_mode = 0;

    if ((texture_type == SCE_GXM_TEXTURE_SWIZZLED) || (texture_type == SCE_GXM_TEXTURE_CUBE)) {
        texture->height_base2 = std::bit_width(height) - 1;
        texture->width_base2 = std::bit_width(width) - 1;
    } else {
        texture->height = height - 1;
        texture->width = width - 1;
    }

    texture->uaddr_mode = texture->vaddr_mode = SCE_GXM_TEXTURE_ADDR_CLAMP;
    texture->base_format = (tex_format & 0x1F000000) >> 24;
    texture->type = texture_type >> 29;
    texture->data_addr = data.address() >> 2;
    texture->swizzle_format = (tex_format & 0x7000) >> 12;
    texture->normalize_mode = 1;
    texture->min_filter = SCE_GXM_TEXTURE_FILTER_POINT;
    texture->mag_filter = SCE_GXM_TEXTURE_FILTER_POINT;
    texture->mip_filter = 0;
    texture->lod_min0 = 0;
    texture->lod_min1 = 0;

    return 0;
}

EXPORT(int, _sceGxmBeginScene) {
    TRACY_FUNC(_sceGxmBeginScene);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmAddRazorGpuCaptureBuffer) {
    TRACY_FUNC(sceGxmAddRazorGpuCaptureBuffer);
    return UNIMPLEMENTED();
}

static void update_viewport(renderer::State &state, SceGxmContext *context) {
    if (context->state.viewport.enable == SCE_GXM_VIEWPORT_ENABLED) {
        renderer::set_viewport_real(state, context->renderer.get(), context->state.viewport.offset.x,
            context->state.viewport.offset.y, context->state.viewport.offset.z, context->state.viewport.scale.x,
            context->state.viewport.scale.y, context->state.viewport.scale.z);
    } else {
        renderer::set_viewport_flat(state, context->renderer.get());
    }
}

EXPORT(void, sceGxmSetDefaultRegionClipAndViewport, SceGxmContext *context, uint32_t xMax, uint32_t yMax) {
    TRACY_FUNC(sceGxmSetDefaultRegionClipAndViewport, context, xMax, yMax);
    const std::uint32_t xMin = 0;
    const std::uint32_t yMin = 0;

    context->state.viewport.offset.x = 0.5f * (1.0f + xMin + xMax);
    context->state.viewport.offset.y = 0.5f * (1.0f + yMin + yMax);
    context->state.viewport.offset.z = 0.5f;
    context->state.viewport.scale.x = 0.5f * (1.0f + xMax - xMin);
    context->state.viewport.scale.y = -0.5f * (1.0f + yMax - yMin);
    context->state.viewport.scale.z = 0.5f;

    context->state.region_clip_min.x = xMin;
    context->state.region_clip_max.x = xMax;
    context->state.region_clip_min.y = yMin;
    context->state.region_clip_max.y = yMax;
    context->state.region_clip_mode = SCE_GXM_REGION_CLIP_OUTSIDE;

    if (context->alloc_space) {
        // Set default region clip and viewport
        renderer::set_region_clip(*emuenv.renderer, context->renderer.get(), SCE_GXM_REGION_CLIP_OUTSIDE,
            xMin, xMax, yMin, yMax);

        update_viewport(*emuenv.renderer, context);
    }
}

static void gxmContextStateRestore(renderer::State &state, SceGxmContext *context, const bool sync_viewport_and_clip) {
    if (sync_viewport_and_clip) {
        renderer::set_region_clip(state, context->renderer.get(), SCE_GXM_REGION_CLIP_OUTSIDE,
            context->state.region_clip_min.x, context->state.region_clip_max.x, context->state.region_clip_min.y,
            context->state.region_clip_max.y);

        update_viewport(state, context);
    }

    renderer::set_cull_mode(state, context->renderer.get(), context->state.cull_mode);
    renderer::set_depth_bias(state, context->renderer.get(), true, context->state.front_depth_bias_factor, context->state.front_depth_bias_units);
    renderer::set_depth_bias(state, context->renderer.get(), false, context->state.back_depth_bias_factor, context->state.back_depth_bias_units);
    renderer::set_depth_func(state, context->renderer.get(), true, context->state.front_depth_func);
    renderer::set_depth_func(state, context->renderer.get(), false, context->state.back_depth_func);
    renderer::set_depth_write_enable_mode(state, context->renderer.get(), true, context->state.front_depth_write_enable);
    renderer::set_depth_write_enable_mode(state, context->renderer.get(), false, context->state.back_depth_write_enable);
    renderer::set_point_line_width(state, context->renderer.get(), true, context->state.front_point_line_width);
    renderer::set_point_line_width(state, context->renderer.get(), false, context->state.back_point_line_width);
    renderer::set_polygon_mode(state, context->renderer.get(), true, context->state.front_polygon_mode);
    renderer::set_polygon_mode(state, context->renderer.get(), false, context->state.back_polygon_mode);
    renderer::set_two_sided_enable(state, context->renderer.get(), context->state.two_sided);
    renderer::set_stencil_func(state, context->renderer.get(), true, context->state.front_stencil.func, context->state.front_stencil.stencil_fail,
        context->state.front_stencil.depth_fail, context->state.front_stencil.depth_pass, context->state.front_stencil.compare_mask,
        context->state.front_stencil.write_mask);
    renderer::set_stencil_func(state, context->renderer.get(), false, context->state.back_stencil.func, context->state.back_stencil.stencil_fail,
        context->state.back_stencil.depth_fail, context->state.back_stencil.depth_pass, context->state.back_stencil.compare_mask,
        context->state.back_stencil.write_mask);
    renderer::set_stencil_ref(state, context->renderer.get(), true, context->state.front_stencil.ref);
    renderer::set_stencil_ref(state, context->renderer.get(), false, context->state.back_stencil.ref);

    if (state.features.support_memory_mapping) {
        context->state.visibility_enable = false;
        context->state.visibility_index = 0;
        context->state.visibility_is_increment = true;
        renderer::set_visibility_index(state, context->renderer.get(), false, 0, true);
    }

    if (context->state.vertex_program) {
        renderer::set_program(state, context->renderer.get(), context->state.vertex_program, false);

        context->is_vert_texture_dirty.set();
    }

    // The uniform buffer, vertex stream will be uploaded later, for now only need to resync de textures
    if (context->state.fragment_program) {
        renderer::set_program(state, context->renderer.get(), context->state.fragment_program, true);

        context->is_frag_texture_dirty.set();
    }
}

EXPORT(int, sceGxmBeginCommandList, SceGxmContext *deferredContext) {
    TRACY_FUNC(sceGxmBeginCommandList, deferredContext);
    if (!deferredContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    deferredContext->state.fragment_ring_buffer_used = 0;
    deferredContext->state.vertex_ring_buffer_used = 0;

    deferredContext->curr_command_list = new SceGxmCommandList();

    if (!deferredContext->make_new_alloc_space(emuenv.kernel, emuenv.mem, thread_id)) {
        return RET_ERROR(SCE_GXM_ERROR_RESERVE_FAILED);
    }

    // in case the same vdm buffer was used for two consecutive command lists
    deferredContext->alloc_space_start = deferredContext->alloc_space;

    if (!deferredContext->state.vertex_ring_buffer) {
        deferredContext->state.vertex_ring_buffer = gxmRunDeferredMemoryCallback(emuenv.kernel, emuenv.mem, emuenv.gxm.callback_lock, deferredContext->state.vertex_ring_buffer_size,
            deferredContext->state.vertex_memory_callback, deferredContext->state.memory_callback_userdata, DEFAULT_RING_SIZE, thread_id);

        if (!deferredContext->state.vertex_ring_buffer) {
            return RET_ERROR(SCE_GXM_ERROR_RESERVE_FAILED);
        }
    }

    if (!deferredContext->state.fragment_ring_buffer) {
        deferredContext->state.fragment_ring_buffer = gxmRunDeferredMemoryCallback(emuenv.kernel, emuenv.mem, emuenv.gxm.callback_lock, deferredContext->state.fragment_ring_buffer_size,
            deferredContext->state.fragment_memory_callback, deferredContext->state.memory_callback_userdata, DEFAULT_RING_SIZE, thread_id);

        if (!deferredContext->state.fragment_ring_buffer) {
            return RET_ERROR(SCE_GXM_ERROR_RESERVE_FAILED);
        }
    }

    // Set command allocate functions
    KernelState *kernel = &emuenv.kernel;
    MemState *mem = &emuenv.mem;

    deferredContext->renderer->alloc_func = [deferredContext, kernel, mem, thread_id]() {
        return deferredContext->allocate_new_command(*kernel, *mem, thread_id);
    };

    deferredContext->renderer->free_func = [](renderer::Command *cmd) {
        // do not delete here, commands will be deleted when they are overwritten
    };

    // Begin the command list by white washing previous command list, and restoring deferred state
    renderer::reset_command_list(deferredContext->renderer->command_list);
    gxmContextStateRestore(*emuenv.renderer, deferredContext, false);

    deferredContext->state.active = true;

    return 0;
}

EXPORT(int, sceGxmBeginScene, SceGxmContext *context, uint32_t flags, const SceGxmRenderTarget *renderTarget, const SceGxmValidRegion *validRegion, SceGxmSyncObject *vertexSyncObject, Ptr<SceGxmSyncObject> fragmentSyncObject, const SceGxmColorSurface *colorSurface, const SceGxmDepthStencilSurface *depthStencil) {
    TRACY_FUNC(sceGxmBeginScene, context, flags, renderTarget, validRegion, vertexSyncObject, fragmentSyncObject, colorSurface, depthStencil);
    if (!context) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (flags & 0xFFFFFFF0) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!renderTarget || (vertexSyncObject != nullptr)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (context->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (context->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_SCENE);
    }

    context->state.fragment_sync_object = fragmentSyncObject;

    if (fragmentSyncObject) {
        SceGxmSyncObject *sync = fragmentSyncObject.get(emuenv.mem);

        // Wait for the display queue to be done.
        // If it's offline render, the sync object already has the display queue subject done, so don't worry.
        renderer::add_command(context->renderer.get(), renderer::CommandOpcode::WaitSyncObject,
            nullptr, fragmentSyncObject, sync->last_display.load());
    }

    // It's legal to set at client.
    context->state.active = true;
    context->last_precomputed = false;

    // set all textures as dirty, in case their content is modified (even though I really don't think it would)
    // or some texture is the current framebuffer
    context->is_vert_texture_dirty.set();
    context->is_frag_texture_dirty.set();

    SceGxmColorSurface *color_surface_copy = nullptr;
    SceGxmDepthStencilSurface *depth_stencil_surface_copy = nullptr;

    if (colorSurface) {
        color_surface_copy = new SceGxmColorSurface;
        *color_surface_copy = *colorSurface;
    }

    if (depthStencil) {
        depth_stencil_surface_copy = new SceGxmDepthStencilSurface;
        *depth_stencil_surface_copy = *depthStencil;
    }

    renderer::set_context(*emuenv.renderer, context->renderer.get(), renderTarget->renderer.get(), color_surface_copy,
        depth_stencil_surface_copy);

    const std::uint32_t xmax = (validRegion ? validRegion->xMax : renderTarget->width - 1);
    const std::uint32_t ymax = (validRegion ? validRegion->yMax : renderTarget->height - 1);

    CALL_EXPORT(sceGxmSetDefaultRegionClipAndViewport, context, xmax, ymax);
    return 0;
}

EXPORT(int, sceGxmBeginSceneEx, SceGxmContext *immediateContext, uint32_t flags, const SceGxmRenderTarget *renderTarget, const SceGxmValidRegion *validRegion, SceGxmSyncObject *vertexSyncObject, Ptr<SceGxmSyncObject> fragmentSyncObject, const SceGxmColorSurface *colorSurface, const SceGxmDepthStencilSurface *loadDepthStencilSurface, const SceGxmDepthStencilSurface *storeDepthStencilSurface) {
    TRACY_FUNC(sceGxmBeginSceneEx, immediateContext, flags, renderTarget, validRegion, vertexSyncObject, fragmentSyncObject, colorSurface, loadDepthStencilSurface, storeDepthStencilSurface);
    // storeDepthStencilSurface
    if (!immediateContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (flags & 0xFFFFFFF0) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!renderTarget || (vertexSyncObject != nullptr)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (immediateContext->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (immediateContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_SCENE);
    }

    STUBBED("Using sceGxmBeginScene");
    return CALL_EXPORT(sceGxmBeginScene, immediateContext, flags, renderTarget, validRegion, vertexSyncObject, fragmentSyncObject, colorSurface, loadDepthStencilSurface);
}

DECL_EXPORT(int, sceGxmTextureInitLinear, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount);
DECL_EXPORT(int, sceGxmTextureInitLinearStrided, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t byteStride);
DECL_EXPORT(int, sceGxmTextureInitSwizzled, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount);
DECL_EXPORT(int, sceGxmTextureInitTiled, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount);
DECL_EXPORT(int, sceGxmTextureSetData, SceGxmTexture *texture, Ptr<const void> data);
DECL_EXPORT(int, sceGxmTextureSetFormat, SceGxmTexture *texture, SceGxmTextureFormat texFormat);
DECL_EXPORT(int, sceGxmTextureSetGammaMode, SceGxmTexture *texture, SceGxmTextureGammaMode gammaMode);

EXPORT(void, sceGxmColorSurfaceGetClip, const SceGxmColorSurface *surface, uint32_t *xMin, uint32_t *yMin, uint32_t *xMax, uint32_t *yMax) {
    TRACY_FUNC(sceGxmColorSurfaceGetClip, surface, xMin, yMin, xMax, yMax);
    assert(surface);
    UNIMPLEMENTED();
}

EXPORT(Ptr<void>, sceGxmColorSurfaceGetData, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetData, surface);
    assert(surface);
    return surface->data;
}

EXPORT(SceGxmColorSurfaceDitherMode, sceGxmColorSurfaceGetDitherMode, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetDitherMode, surface);
    assert(surface);
    STUBBED("SCE_GXM_COLOR_SURFACE_DITHER_DISABLED");
    return SceGxmColorSurfaceDitherMode::SCE_GXM_COLOR_SURFACE_DITHER_DISABLED;
}

EXPORT(SceGxmColorFormat, sceGxmColorSurfaceGetFormat, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetFormat, surface);
    assert(surface);
    return surface->colorFormat;
}

EXPORT(SceGxmColorSurfaceGammaMode, sceGxmColorSurfaceGetGammaMode, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetGammaMode, surface);
    assert(surface);
    return static_cast<SceGxmColorSurfaceGammaMode>(surface->gamma << 12);
}

EXPORT(SceGxmColorSurfaceScaleMode, sceGxmColorSurfaceGetScaleMode, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetScaleMode, surface);
    assert(surface);
    return surface->downscale ? SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE : SCE_GXM_COLOR_SURFACE_SCALE_NONE;
}

EXPORT(uint32_t, sceGxmColorSurfaceGetStrideInPixels, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetStrideInPixels, surface);
    assert(surface);
    return surface->strideInPixels;
}

EXPORT(SceGxmColorSurfaceType, sceGxmColorSurfaceGetType, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceGetType, surface);
    assert(surface);
    return surface->surfaceType;
}

EXPORT(int, sceGxmColorSurfaceInit, SceGxmColorSurface *surface, SceGxmColorFormat colorFormat, SceGxmColorSurfaceType surfaceType, SceGxmColorSurfaceScaleMode scaleMode, SceGxmOutputRegisterSize outputRegisterSize, uint32_t width, uint32_t height, uint32_t strideInPixels, Ptr<void> data) {
    TRACY_FUNC(sceGxmColorSurfaceInit, surface, colorFormat, surfaceType, scaleMode, outputRegisterSize, width, height, strideInPixels, data);
    if (!surface || !data)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (width == 0 || width > 4096 || height == 0 || height > 4096)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    if (strideInPixels & 1)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);

    if ((strideInPixels < width) || ((data.address() & 3) != 0))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    // if the surface is swizzled, width and height must be power of 2
    if (surfaceType == SCE_GXM_COLOR_SURFACE_SWIZZLED && ((width & (width - 1)) || (height & (height - 1))))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);

    memset(surface, 0, sizeof(SceGxmColorSurface));
    surface->disabled = 0;
    surface->downscale = scaleMode == SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE;
    surface->width = width;
    surface->height = height;
    surface->strideInPixels = strideInPixels;
    surface->data = data;
    surface->colorFormat = colorFormat;
    surface->surfaceType = surfaceType;
    surface->outputRegisterSize = outputRegisterSize;

    SceGxmTextureFormat tex_format;
    if (!gxm::convert_color_format_to_texture_format(colorFormat, tex_format)) {
        LOG_WARN("Unable to convert color surface type 0x{:X} to texture format enum for background texture of color surface!", static_cast<std::uint32_t>(colorFormat));
    }

    // initialize the background texture
    switch (surfaceType) {
    case SCE_GXM_COLOR_SURFACE_SWIZZLED:
        return CALL_EXPORT(sceGxmTextureInitSwizzled, &surface->backgroundTex, surface->data, tex_format, width, height, 1);
    case SCE_GXM_COLOR_SURFACE_TILED:
        return CALL_EXPORT(sceGxmTextureInitTiled, &surface->backgroundTex, surface->data, tex_format, width, height, 1);
    default:
        // linear
        if (align(width, 8) == strideInPixels) {
            return CALL_EXPORT(sceGxmTextureInitLinear, &surface->backgroundTex, surface->data, tex_format, width, height, 1);
        } else {
            uint32_t stride_in_bytes = static_cast<uint32_t>(gxm::bits_per_pixel(gxm::get_base_format(colorFormat)) * strideInPixels / 8);
            return CALL_EXPORT(sceGxmTextureInitLinearStrided, &surface->backgroundTex, surface->data, tex_format, width, height, stride_in_bytes);
        }
    }
}

EXPORT(int, sceGxmColorSurfaceInitDisabled, SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceInitDisabled, surface);
    if (!surface)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    memset(surface, 0, sizeof(SceGxmColorSurface));
    surface->disabled = 1;

    // this matches what is being done on a real PS Vita
    return CALL_EXPORT(sceGxmTextureInitLinear, &surface->backgroundTex, Ptr<void>(), SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_BGRA, 1, 1, 0);
}

EXPORT(bool, sceGxmColorSurfaceIsEnabled, const SceGxmColorSurface *surface) {
    TRACY_FUNC(sceGxmColorSurfaceIsEnabled, surface);
    assert(surface);
    return !surface->disabled;
}

EXPORT(void, sceGxmColorSurfaceSetClip, SceGxmColorSurface *surface, uint32_t xMin, uint32_t yMin, uint32_t xMax, uint32_t yMax) {
    TRACY_FUNC(sceGxmColorSurfaceSetClip, surface, xMin, yMin, xMax, yMax);
    assert(surface);
    UNIMPLEMENTED();
}

EXPORT(int, sceGxmColorSurfaceSetData, SceGxmColorSurface *surface, Ptr<void> data) {
    TRACY_FUNC(sceGxmColorSurfaceSetData, surface, data);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (data.address() & 3) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    surface->data = data;
    return CALL_EXPORT(sceGxmTextureSetData, &surface->backgroundTex, data);
}

EXPORT(int, sceGxmColorSurfaceSetDitherMode, SceGxmColorSurface *surface, SceGxmColorSurfaceDitherMode ditherMode) {
    TRACY_FUNC(sceGxmColorSurfaceSetDitherMode, surface, ditherMode);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmColorSurfaceSetFormat, SceGxmColorSurface *surface, SceGxmColorFormat format) {
    TRACY_FUNC(sceGxmColorSurfaceSetFormat, surface, format);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    surface->colorFormat = format;

    SceGxmTextureFormat tex_format;
    if (!gxm::convert_color_format_to_texture_format(format, tex_format)) {
        LOG_WARN("Unable to convert color surface type 0x{:X} to texture format enum for background texture of color surface!", static_cast<std::uint32_t>(format));
    }

    return CALL_EXPORT(sceGxmTextureSetFormat, &surface->backgroundTex, tex_format);
}

EXPORT(int, sceGxmColorSurfaceSetGammaMode, SceGxmColorSurface *surface, SceGxmColorSurfaceGammaMode gammaMode) {
    TRACY_FUNC(sceGxmColorSurfaceSetGammaMode, surface, gammaMode);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    surface->gamma = static_cast<uint32_t>(gammaMode) >> 12;
    SceGxmTextureGammaMode texture_gamma;
    switch (gammaMode) {
    case SCE_GXM_COLOR_SURFACE_GAMMA_BGR:
        texture_gamma = SCE_GXM_TEXTURE_GAMMA_BGR;
        break;
    case SCE_GXM_COLOR_SURFACE_GAMMA_GR:
        texture_gamma = SCE_GXM_TEXTURE_GAMMA_GR;
        break;
    default:
        texture_gamma = SCE_GXM_TEXTURE_GAMMA_NONE;
        break;
    }
    return CALL_EXPORT(sceGxmTextureSetGammaMode, &surface->backgroundTex, texture_gamma);
}

EXPORT(void, sceGxmColorSurfaceSetScaleMode, SceGxmColorSurface *surface, SceGxmColorSurfaceScaleMode scaleMode) {
    TRACY_FUNC(sceGxmColorSurfaceSetScaleMode, surface, scaleMode);
    assert(surface);
    UNIMPLEMENTED();
}

EXPORT(int, sceGxmCreateContext, const SceGxmContextParams *params, Ptr<SceGxmContext> *context) {
    TRACY_FUNC(sceGxmCreateContext, params, context);
    if (!params || !context)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (params->hostMemSize < sizeof(SceGxmContext)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    *context = params->hostMem.cast<SceGxmContext>();

    SceGxmContext *const ctx = context->get(emuenv.mem);
    new (ctx) SceGxmContext(emuenv.gxm.callback_lock);

    ctx->state.fragment_ring_buffer = params->fragmentRingBufferMem;
    ctx->state.vertex_ring_buffer = params->vertexRingBufferMem;
    ctx->state.fragment_ring_buffer_size = params->fragmentRingBufferMemSize;
    ctx->state.vertex_ring_buffer_size = params->vertexRingBufferMemSize;

    ctx->state.type = SCE_GXM_CONTEXT_TYPE_IMMEDIATE;

    if (!renderer::create_context(*emuenv.renderer, ctx->renderer)) {
        context->reset();
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);
    }

    // Set VDM buffer space
    ctx->state.vdm_buffer = params->vdmRingBufferMem;
    ctx->state.vdm_buffer_size = params->vdmRingBufferMemSize;

    ctx->make_new_alloc_space(emuenv.kernel, emuenv.mem, thread_id);

    // Set command allocate functions
    // The command buffer will not be reallocated, so this is fine to use this thread ID
    KernelState *kernel = &emuenv.kernel;
    MemState *mem = &emuenv.mem;

    ctx->renderer->alloc_func = [ctx, kernel, mem, thread_id]() {
        return ctx->allocate_new_command(*kernel, *mem, thread_id);
    };

    ctx->renderer->free_func = [ctx](renderer::Command *cmd) {
        return ctx->free_new_command(cmd);
    };

    return 0;
}

EXPORT(int, sceGxmCreateDeferredContext, SceGxmDeferredContextParams *params, Ptr<SceGxmContext> *deferredContext) {
    TRACY_FUNC(sceGxmCreateDeferredContext, params, deferredContext);
    if (!params || !deferredContext)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (params->hostMemSize < sizeof(SceGxmContext)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    *deferredContext = params->hostMem.cast<SceGxmContext>();
    SceGxmContext *const ctx = deferredContext->get(emuenv.mem);
    new (ctx) SceGxmContext(emuenv.gxm.callback_lock);

    ctx->state.vertex_memory_callback = params->vertexCallback;
    ctx->state.fragment_memory_callback = params->fragmentCallback;
    ctx->state.vdm_memory_callback = params->vdmCallback;
    ctx->state.memory_callback_userdata = params->userData;

    ctx->state.type = SCE_GXM_CONTEXT_TYPE_DEFERRED;

    // Create a generic context. This is only used for storing command list
    ctx->renderer = std::make_unique<renderer::Context>();

    return 0;
}

EXPORT(int, sceGxmCreateRenderTarget, const SceGxmRenderTargetParams *params, Ptr<SceGxmRenderTarget> *renderTarget) {
    TRACY_FUNC(sceGxmCreateRenderTarget, params, renderTarget);
    if (!params) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (params->flags & 0xFFFF00EC) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!renderTarget) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    *renderTarget = alloc<SceGxmRenderTarget>(emuenv.mem, __FUNCTION__);
    if (!*renderTarget) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }

    SceGxmRenderTarget *const rt = renderTarget->get(emuenv.mem);
    if (!renderer::create_render_target(*emuenv.renderer, rt->renderer, params)) {
        free(emuenv.mem, *renderTarget);
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);
    }

    rt->width = params->width;
    rt->height = params->height;
    rt->scenesPerFrame = params->scenesPerFrame;
    rt->driverMemBlock = params->driverMemBlock;

    return 0;
}

EXPORT(float, sceGxmDepthStencilSurfaceGetBackgroundDepth, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetBackgroundDepth, surface);
    return surface->background_depth;
}

EXPORT(bool, sceGxmDepthStencilSurfaceGetBackgroundMask, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetBackgroundMask, surface);
    return surface->mask;
}

EXPORT(uint8_t, sceGxmDepthStencilSurfaceGetBackgroundStencil, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetBackgroundStencil, surface);
    return surface->stencil;
}

EXPORT(SceGxmDepthStencilForceLoadMode, sceGxmDepthStencilSurfaceGetForceLoadMode, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetForceLoadMode, surface);
    return surface->force_load ? SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_ENABLED : SCE_GXM_DEPTH_STENCIL_FORCE_LOAD_DISABLED;
}

EXPORT(SceGxmDepthStencilForceStoreMode, sceGxmDepthStencilSurfaceGetForceStoreMode, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetForceStoreMode, surface);
    return surface->force_store ? SCE_GXM_DEPTH_STENCIL_FORCE_STORE_ENABLED : SCE_GXM_DEPTH_STENCIL_FORCE_STORE_DISABLED;
}

EXPORT(int, sceGxmDepthStencilSurfaceGetFormat, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetFormat, surface);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }
    return surface->get_format();
}

EXPORT(uint32_t, sceGxmDepthStencilSurfaceGetStrideInSamples, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceGetStrideInSamples, surface);
    if (!surface)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    return surface->get_stride();
}

EXPORT(int, sceGxmDepthStencilSurfaceInit, SceGxmDepthStencilSurface *surface, SceGxmDepthStencilFormat depthStencilFormat, SceGxmDepthStencilSurfaceType surfaceType, uint32_t strideInSamples, Ptr<void> depthData, Ptr<void> stencilData) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceInit, surface, depthStencilFormat, surfaceType, strideInSamples, depthData, stencilData);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((strideInSamples == 0) || ((strideInSamples % SCE_GXM_TILE_SIZEX) != 0)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    memset(surface, 0, sizeof(SceGxmDepthStencilSurface));
    surface->unk1 = 1;
    surface->unk2 = 1;
    surface->set_stride(strideInSamples);
    surface->set_type(surfaceType);
    surface->set_format(depthStencilFormat);
    surface->depth_data = depthData;
    surface->stencil_data = stencilData;
    surface->background_depth = 1.0f;
    surface->mask = 1;

    return 0;
}

EXPORT(int, sceGxmDepthStencilSurfaceInitDisabled, SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceInitDisabled, surface);
    if (!surface) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    memset(surface, 0, sizeof(SceGxmDepthStencilSurface));
    surface->background_depth = 1.0f;
    surface->mask = 1;
    surface->unk2 = 1;

    return 0;
}

EXPORT(bool, sceGxmDepthStencilSurfaceIsEnabled, const SceGxmDepthStencilSurface *surface) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceIsEnabled, surface);
    if (!surface)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    return !surface->disabled();
}

EXPORT(void, sceGxmDepthStencilSurfaceSetBackgroundDepth, SceGxmDepthStencilSurface *surface, float depth) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceSetBackgroundDepth, surface, depth);
    if (!surface) {
        RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
        return;
    }

    surface->background_depth = depth;
}

EXPORT(void, sceGxmDepthStencilSurfaceSetBackgroundMask, SceGxmDepthStencilSurface *surface, bool mask) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceSetBackgroundMask, surface, mask);
    if (!surface) {
        RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
        return;
    }

    surface->mask = static_cast<uint32_t>(mask);
}

EXPORT(void, sceGxmDepthStencilSurfaceSetBackgroundStencil, SceGxmDepthStencilSurface *surface, uint8_t stencil) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceSetBackgroundStencil, surface, stencil);
    if (!surface) {
        RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
        return;
    }

    surface->stencil = stencil;
}

EXPORT(void, sceGxmDepthStencilSurfaceSetForceLoadMode, SceGxmDepthStencilSurface *surface, SceGxmDepthStencilForceLoadMode forceLoad) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceSetForceLoadMode, surface, forceLoad);
    if (!surface) {
        RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
        return;
    }

    surface->force_load = static_cast<bool>(forceLoad);
}

EXPORT(void, sceGxmDepthStencilSurfaceSetForceStoreMode, SceGxmDepthStencilSurface *surface, SceGxmDepthStencilForceStoreMode forceStore) {
    TRACY_FUNC(sceGxmDepthStencilSurfaceSetForceStoreMode, surface, forceStore);
    if (!surface) {
        RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
        return;
    }

    surface->force_store = static_cast<bool>(forceStore);
}

EXPORT(int, sceGxmDestroyContext, Ptr<SceGxmContext> context) {
    TRACY_FUNC(sceGxmDestroyContext, context);
    if (!context)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    renderer::destroy_context(*emuenv.renderer, context.get(emuenv.mem)->renderer);

    return 0;
}

EXPORT(int, sceGxmDestroyDeferredContext, SceGxmContext *deferredContext) {
    TRACY_FUNC(sceGxmDestroyDeferredContext, deferredContext);
    if (!deferredContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmDestroyRenderTarget, Ptr<SceGxmRenderTarget> renderTarget) {
    TRACY_FUNC(sceGxmDestroyRenderTarget, renderTarget);
    MemState &mem = emuenv.mem;

    if (!renderTarget)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    if (!renderTarget.valid(mem))
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);

    renderer::destroy_render_target(*emuenv.renderer, renderTarget.get(mem)->renderer);

    free(mem, renderTarget);

    return 0;
}

EXPORT(int, sceGxmDisplayQueueAddEntry, Ptr<SceGxmSyncObject> oldBuffer, Ptr<SceGxmSyncObject> newBuffer, Ptr<const void> callbackData) {
    TRACY_FUNC(sceGxmDisplayQueueAddEntry, oldBuffer, newBuffer, callbackData);
    if (!oldBuffer || !newBuffer)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    const Address address = alloc(emuenv.mem, emuenv.gxm.params.displayQueueCallbackDataSize, __FUNCTION__);
    const Ptr<void> ptr(address);
    memcpy(ptr.get(emuenv.mem), callbackData.get(emuenv.mem), emuenv.gxm.params.displayQueueCallbackDataSize);

    DisplayFrameInfo *frame = predict_next_image(emuenv, newBuffer.address());

    // Block future rendering by setting values of sync object
    SceGxmSyncObject *oldBufferSync = oldBuffer.get(emuenv.mem);
    SceGxmSyncObject *newBufferSync = newBuffer.get(emuenv.mem);

    DisplayCallback display_callback{
        .data = address,
        .old_sync = oldBuffer,
        .new_sync = newBuffer,
        .old_sync_timestamp = oldBufferSync->timestamp_ahead,
        .new_sync_timestamp = newBufferSync->timestamp_ahead,
        .frame_predicted = frame != nullptr
    };

    oldBufferSync->last_display = ++oldBufferSync->timestamp_ahead;
    if (oldBufferSync != newBufferSync)
        newBufferSync->last_display = ++newBufferSync->timestamp_ahead;
    emuenv.gxm.last_display_global = emuenv.gxm.global_timestamp.fetch_add(1, std::memory_order_relaxed);

    // function may be blocking here (expected behavior)
    emuenv.gxm.display_queue.push(display_callback);

    // TODO: I do this because the sync function does not have access to the display state, but this is not great
    renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::NewFrame, false, frame, &emuenv.display);

    if (emuenv.gxm.params.displayQueueMaxPendingCount == 1)
        // double buffering, not handled by the queue configuration
        emuenv.gxm.display_queue.wait_empty();

    return 0;
}

EXPORT(int, sceGxmDisplayQueueFinish) {
    TRACY_FUNC(sceGxmDisplayQueueFinish);
    emuenv.gxm.display_queue.wait_empty();

    return 0;
}

static void gxmSetUniformBuffers(renderer::State &state, GxmState &gxm, SceGxmContext *context, const SceGxmProgram &program, std::span<UniformBuffer> buffers, const UniformBufferSizes &sizes, const MemState &mem) {
    for (size_t i = 0; i < buffers.size(); i++) {
        if (!buffers[i] || sizes.at(i) == 0) {
            continue;
        }

        uint32_t bytes_to_copy = sizes.at(i) * 4;
        if (sizes.at(i) == SCE_GXM_MAX_UB_IN_FLOAT_UNIT) {
            auto ite = gxm.memory_mapped_regions.lower_bound(buffers[i].address());
            if ((ite != gxm.memory_mapped_regions.end()) && ((ite->first + ite->second.size) > buffers[i].address())) {
                // Bound the size
                bytes_to_copy = std::min<uint32_t>(ite->first + ite->second.size - buffers[i].address(), bytes_to_copy);
            }

            // Check other UB friends and bound the size
            for (size_t j = 0; j < buffers.size(); j++) {
                if (i == j) {
                    continue;
                }

                if (buffers[j].address() > buffers[i].address()) {
                    bytes_to_copy = std::min<std::uint32_t>(buffers[j].address() - buffers[i].address(), bytes_to_copy);
                }
            }
        }
        renderer::set_uniform_buffer(state, context->renderer.get(), !program.is_fragment(), i, bytes_to_copy, buffers[i]);
    }
}

static int gxmDrawElementGeneral(EmuEnvState &emuenv, const char *export_name, const SceUID thread_id, SceGxmContext *context, SceGxmPrimitiveType primType, SceGxmIndexFormat indexType, Ptr<const void> indexData, uint32_t indexCount, uint32_t instanceCount) {
    if (!context || !indexData)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!context->state.active) {
        if (context->state.type == SCE_GXM_CONTEXT_TYPE_DEFERRED) {
            return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_COMMAND_LIST);
        } else {
            return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_SCENE);
        }
    }

    if (!context->state.fragment_program || !context->state.vertex_program) {
        return RET_ERROR(SCE_GXM_ERROR_NULL_PROGRAM);
    }

    const SceGxmFragmentProgram &gxm_fragment_program = *context->state.fragment_program.get(emuenv.mem);
    const SceGxmVertexProgram &gxm_vertex_program = *context->state.vertex_program.get(emuenv.mem);

    // Set uniforms
    const SceGxmProgram &vertex_program_gxp = *gxm_vertex_program.program.get(emuenv.mem);
    const SceGxmProgram &fragment_program_gxp = *gxm_fragment_program.program.get(emuenv.mem);

    const void *indices_ptr = indexData.get(emuenv.mem);

    gxmSetUniformBuffers(*emuenv.renderer, emuenv.gxm, context, vertex_program_gxp, context->state.vertex_uniform_buffers, gxm_vertex_program.renderer_data->uniform_buffer_sizes,
        emuenv.mem);
    gxmSetUniformBuffers(*emuenv.renderer, emuenv.gxm, context, fragment_program_gxp, context->state.fragment_uniform_buffers, gxm_fragment_program.renderer_data->uniform_buffer_sizes,
        emuenv.mem);

    if (context->last_precomputed) {
        // Need to re-set the data

        renderer::set_program(*emuenv.renderer, context->renderer.get(), context->state.vertex_program, false);
        renderer::set_program(*emuenv.renderer, context->renderer.get(), context->state.fragment_program, true);

        context->last_precomputed = false;
    }

    // set textures that are dirty
    const gxp::TextureInfo vert_textures_sync = gxm_vertex_program.renderer_data->textures_used & context->is_vert_texture_dirty;
    context->is_vert_texture_dirty &= ~vert_textures_sync;
    const gxp::TextureInfo frag_textures_sync = gxm_fragment_program.renderer_data->textures_used & context->is_frag_texture_dirty;
    context->is_frag_texture_dirty &= ~frag_textures_sync;
    const auto &textures = context->state.textures;
    for (uint16_t texture_index = 0; texture_index < SCE_GXM_MAX_TEXTURE_UNITS; texture_index++) {
        if (vert_textures_sync[texture_index]) {
            const uint16_t index_position = SCE_GXM_MAX_TEXTURE_UNITS + texture_index;
            renderer::set_texture(*emuenv.renderer, context->renderer.get(), index_position, textures[index_position]);
        }

        if (frag_textures_sync[texture_index])
            renderer::set_texture(*emuenv.renderer, context->renderer.get(), texture_index, textures[texture_index]);
    }

    // Update vertex data. We should stores a copy of the data to pass it to GPU later, since another scene
    // may start to overwrite stuff when this scene is being processed in our queue (in case of OpenGL).
    size_t max_index = 0;
    if (!emuenv.renderer->features.support_memory_mapping) {
        // we don't need to get the vertex buffer size with memory mapping
        if (indexType == SCE_GXM_INDEX_FORMAT_U16) {
            const uint16_t *const data = static_cast<const uint16_t *>(indices_ptr);
            max_index = *std::max_element(&data[0], &data[indexCount]);
        } else {
            const uint32_t *const data = static_cast<const uint32_t *>(indices_ptr);
            max_index = *std::max_element(&data[0], &data[indexCount]);
        }
    }

    size_t max_data_length[SCE_GXM_MAX_VERTEX_STREAMS] = {};
    std::uint32_t stream_used = 0;
    for (const SceGxmVertexAttribute &attribute : gxm_vertex_program.attributes) {
        if (!emuenv.renderer->features.support_memory_mapping) {
            const size_t attribute_size = gxm::attribute_format_size(attribute.format) * attribute.componentCount;
            const SceGxmVertexStream &stream = gxm_vertex_program.streams[attribute.streamIndex];
            const SceGxmIndexSource index_source = static_cast<SceGxmIndexSource>(stream.indexSource);
            const size_t data_passed_length = gxm::is_stream_instancing(index_source) ? ((instanceCount - 1) * stream.stride) : (max_index * stream.stride);
            const size_t data_length = attribute.offset + data_passed_length + attribute_size;
            max_data_length[attribute.streamIndex] = std::max<size_t>(max_data_length[attribute.streamIndex], data_length);
        }

        stream_used |= (1 << attribute.streamIndex);
    }

    // Copy and queue upload
    for (size_t stream_index = 0; stream_index < SCE_GXM_MAX_VERTEX_STREAMS; ++stream_index) {
        // Upload it
        if (stream_used & (1 << static_cast<std::uint16_t>(stream_index))) {
            const size_t data_length = max_data_length[stream_index];
            const Ptr<const void> data = context->state.stream_data[stream_index];

            renderer::set_vertex_stream(*emuenv.renderer, context->renderer.get(), stream_index,
                data_length, data);
        }
    }

    renderer::draw(*emuenv.renderer, context->renderer.get(), primType, indexType, indexData, indexCount, instanceCount);

    // increase the ringbuffer position if a default vertex or fragment buffer was reserved, we know the new position will fit in the ringbuffer
    if (context->was_vert_default_uniform_reserved) {
        const size_t size = (size_t)vertex_program_gxp.default_uniform_buffer_count * 4;
        context->state.vertex_ring_buffer_used += size;
        context->was_vert_default_uniform_reserved = false;
    }

    if (context->was_frag_default_uniform_reserved) {
        const size_t size = (size_t)fragment_program_gxp.default_uniform_buffer_count * 4;
        context->state.fragment_ring_buffer_used += size;
        context->was_frag_default_uniform_reserved = false;
    }

    return 0;
}

EXPORT(int, sceGxmDraw, SceGxmContext *context, SceGxmPrimitiveType primType, SceGxmIndexFormat indexType, Ptr<const void> indexData, uint32_t indexCount) {
    TRACY_FUNC(sceGxmDraw, context, primType, indexType, indexData, indexCount);
    return gxmDrawElementGeneral(emuenv, export_name, thread_id, context, primType, indexType, indexData, indexCount, 1);
}

EXPORT(int, sceGxmDrawInstanced, SceGxmContext *context, SceGxmPrimitiveType primType, SceGxmIndexFormat indexType, Ptr<const void> indexData, uint32_t indexCount, uint32_t indexWrap) {
    TRACY_FUNC(sceGxmDrawInstanced, context, primType, indexType, indexData, indexCount, indexWrap);
    if (indexCount % indexWrap != 0) {
        LOG_WARN("Extra vertexes are requested to be drawn (ignored)");
    }

    return gxmDrawElementGeneral(emuenv, export_name, thread_id, context, primType, indexType, indexData, indexWrap, indexCount / indexWrap);
}

EXPORT(int, sceGxmDrawPrecomputed, SceGxmContext *context, SceGxmPrecomputedDraw *draw) {
    TRACY_FUNC(sceGxmDrawPrecomputed, context, draw);
    if (!context) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (!context->state.active) {
        if (context->state.type == SCE_GXM_CONTEXT_TYPE_DEFERRED) {
            return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_COMMAND_LIST);
        } else {
            return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_SCENE);
        }
    }

    if (!draw) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    SceGxmPrecomputedVertexState *vertex_state = context->state.precomputed_vertex_state.cast<SceGxmPrecomputedVertexState>().get(emuenv.mem);
    SceGxmPrecomputedFragmentState *fragment_state = context->state.precomputed_fragment_state.cast<SceGxmPrecomputedFragmentState>().get(emuenv.mem);

    // not sure if precomputed uses current program... maybe it does?
    // anyway states have to be made on a program to program basis so this should be safe
    const Ptr<const SceGxmFragmentProgram> fragment_program_gptr = fragment_state ? fragment_state->program : context->state.fragment_program;
    const Ptr<const SceGxmVertexProgram> vertex_program_gptr = vertex_state ? vertex_state->program : context->state.vertex_program;

    const SceGxmFragmentProgram *fragment_program = fragment_program_gptr.get(emuenv.mem);
    const SceGxmVertexProgram *vertex_program = vertex_program_gptr.get(emuenv.mem);

    if (!vertex_program || !fragment_program) {
        return RET_ERROR(SCE_GXM_ERROR_NULL_PROGRAM);
    }

    renderer::set_program(*emuenv.renderer, context->renderer.get(), fragment_program_gptr, true);
    renderer::set_program(*emuenv.renderer, context->renderer.get(), vertex_program_gptr, false);

    // Set uniforms
    const SceGxmProgram &vertex_program_gxp = *vertex_program->program.get(emuenv.mem);
    const SceGxmProgram &fragment_program_gxp = *fragment_program->program.get(emuenv.mem);

    std::span<UniformBuffer> vertex_buffers = vertex_state ? std::span(vertex_state->uniform_buffers.get(emuenv.mem), vertex_state->buffer_count) : context->state.vertex_uniform_buffers;
    std::span<UniformBuffer> fragment_buffers = fragment_state ? std::span(fragment_state->uniform_buffers.get(emuenv.mem), fragment_state->buffer_count) : context->state.fragment_uniform_buffers;

    gxmSetUniformBuffers(*emuenv.renderer, emuenv.gxm, context, vertex_program_gxp, vertex_buffers, vertex_program->renderer_data->uniform_buffer_sizes,
        emuenv.mem);

    gxmSetUniformBuffers(*emuenv.renderer, emuenv.gxm, context, fragment_program_gxp, fragment_buffers, fragment_program->renderer_data->uniform_buffer_sizes,
        emuenv.mem);

    // Update vertex data. We should stores a copy of the data to pass it to GPU later, since another scene
    // may start to overwrite stuff when this scene is being processed in our queue (in case of OpenGL).
    uint32_t max_index = 0;
    if (!emuenv.renderer->features.support_memory_mapping) {
        // we don't need to get the vertex buffer size with memory mapping
        if (draw->index_format == SCE_GXM_INDEX_FORMAT_U16) {
            const uint16_t *const data = draw->index_data.cast<const uint16_t>().get(emuenv.mem);
            max_index = *std::max_element(&data[0], &data[draw->vertex_count]);
        } else {
            const uint32_t *const data = draw->index_data.cast<const uint32_t>().get(emuenv.mem);
            max_index = *std::max_element(&data[0], &data[draw->vertex_count]);
        }
    }

    // set all textures that are used and mark them as dirty
    const gxp::TextureInfo vert_textures_sync = vertex_program->renderer_data->textures_used;
    context->is_vert_texture_dirty |= vert_textures_sync;
    const gxp::TextureInfo frag_textures_sync = fragment_program->renderer_data->textures_used;
    context->is_frag_texture_dirty |= frag_textures_sync;
    const SceGxmTexture *frag_textures = fragment_state ? fragment_state->textures.get(emuenv.mem) : context->state.textures.data();
    SceGxmTexture *vert_textures = vertex_state ? vertex_state->textures.get(emuenv.mem) : (context->state.textures.data() + SCE_GXM_MAX_TEXTURE_UNITS);
    for (uint16_t texture_index = 0; texture_index < SCE_GXM_MAX_TEXTURE_UNITS; texture_index++) {
        if (vert_textures_sync[texture_index]) {
            const uint16_t index_position = SCE_GXM_MAX_TEXTURE_UNITS + texture_index;
            renderer::set_texture(*emuenv.renderer, context->renderer.get(), index_position, vert_textures[texture_index]);
        }

        if (frag_textures_sync[texture_index])
            renderer::set_texture(*emuenv.renderer, context->renderer.get(), texture_index, frag_textures[texture_index]);
    }

    size_t max_data_length[SCE_GXM_MAX_VERTEX_STREAMS] = {};
    std::uint32_t stream_used = 0;
    for (const SceGxmVertexAttribute &attribute : vertex_program->attributes) {
        if (!emuenv.renderer->features.support_memory_mapping) {
            const size_t attribute_size = gxm::attribute_format_size(attribute.format) * attribute.componentCount;
            const SceGxmVertexStream &stream = vertex_program->streams[attribute.streamIndex];
            const SceGxmIndexSource index_source = static_cast<SceGxmIndexSource>(stream.indexSource);
            const size_t data_passed_length = gxm::is_stream_instancing(index_source) ? ((draw->instance_count - 1) * stream.stride) : (max_index * stream.stride);
            const size_t data_length = attribute.offset + data_passed_length + attribute_size;
            max_data_length[attribute.streamIndex] = std::max<size_t>(max_data_length[attribute.streamIndex], data_length);
        }

        stream_used |= (1 << attribute.streamIndex);
    }

    auto stream_data = draw->stream_data.get(emuenv.mem);
    // Copy and queue upload
    for (size_t stream_index = 0; stream_index < SCE_GXM_MAX_VERTEX_STREAMS; ++stream_index) {
        // Upload it
        if (stream_used & (1 << static_cast<std::uint16_t>(stream_index))) {
            const size_t data_length = max_data_length[stream_index];
            const Ptr<const void> data = stream_data[stream_index];

            renderer::set_vertex_stream(*emuenv.renderer, context->renderer.get(), stream_index,
                data_length, data);
        }
    }

    renderer::draw(*emuenv.renderer, context->renderer.get(), draw->type, draw->index_format, draw->index_data, draw->vertex_count, draw->instance_count);

    // increase the ringbuffer position if a default vertex or fragment buffer was reserved, we know the new position will fit in the ringbuffer
    // also even in a precomputed draw, this is needed as some parts of the pipeline can be not precomputed
    if (context->was_vert_default_uniform_reserved) {
        const size_t size = (size_t)vertex_program_gxp.default_uniform_buffer_count * 4;
        context->state.vertex_ring_buffer_used += size;
        context->was_vert_default_uniform_reserved = false;
    }

    if (context->was_frag_default_uniform_reserved) {
        const size_t size = (size_t)fragment_program_gxp.default_uniform_buffer_count * 4;
        context->state.fragment_ring_buffer_used += size;
        context->was_frag_default_uniform_reserved = false;
    }

    context->last_precomputed = true;
    return 0;
}

EXPORT(int, sceGxmEndCommandList, SceGxmContext *deferredContext, SceGxmCommandList *commandList) {
    TRACY_FUNC(sceGxmEndCommandList, deferredContext, commandList);
    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_COMMAND_LIST);
    }

    // only set the first two fields for commandList (its size is assumed to be 32 bytes by the game)
    commandList->list = deferredContext->linearly_allocate<renderer::CommandList>(emuenv.kernel, emuenv.mem,
        thread_id);

    // also update our own command list
    deferredContext->curr_command_list->list = commandList->list;

    *commandList->list = deferredContext->renderer->command_list;

    // insert last memory range
    deferredContext->insert_new_memory_range();
    deferredContext->curr_command_list = nullptr;

    // Reset active state
    deferredContext->state.active = false;
    deferredContext->reset_recording();

    renderer::reset_command_list(deferredContext->renderer->command_list);

    return 0;
}

EXPORT(int, sceGxmEndScene, SceGxmContext *context, SceGxmNotification *vertexNotification, SceGxmNotification *fragmentNotification) {
    TRACY_FUNC(sceGxmEndScene, context, vertexNotification, fragmentNotification);
    const MemState &mem = emuenv.mem;

    if (!context) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (context->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!context->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_SCENE);
    }

    SceGxmNotification empty_notification = { Ptr<uint32_t>(0), 0 };

    // Add command to end the scene
    renderer::sync_surface_data(*emuenv.renderer, context->renderer.get(), vertexNotification ? *vertexNotification : empty_notification, fragmentNotification ? *fragmentNotification : empty_notification);

    if (context->state.fragment_sync_object) {
        SceGxmSyncObject *sync = context->state.fragment_sync_object.get(mem);
        uint32_t cmd_timestamp = ++sync->timestamp_ahead;
        sync->last_operation_global = emuenv.gxm.global_timestamp.fetch_add(1, std::memory_order_relaxed);

        renderer::add_command(context->renderer.get(), renderer::CommandOpcode::SignalSyncObject,
            nullptr, context->state.fragment_sync_object, cmd_timestamp);
    }

    // Submit our command list
    renderer::submit_command_list(*emuenv.renderer, context->renderer.get(), context->renderer->command_list);
    renderer::reset_command_list(context->renderer->command_list);

    context->state.active = false;
    return 0;
}

EXPORT(int, sceGxmExecuteCommandList, SceGxmContext *context, SceGxmCommandList *commandList) {
    TRACY_FUNC(sceGxmExecuteCommandList, context, commandList);
    if (!context) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (context->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!context->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_SCENE);
    }

    if (!commandList || !commandList->list)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    // Emit a jump to the first command of given command list
    // Since only one immediate context exists per process, direct linking like this should be fine! (I hope)
    renderer::CommandList &imm_cmds = context->renderer->command_list;

    if (imm_cmds.last) {
        imm_cmds.last->next = commandList->list->first;
        imm_cmds.last = commandList->list->last;
    } else {
        imm_cmds.first = commandList->list->first;
        imm_cmds.last = commandList->list->last;
    }

    // Restore back our GXM state
    gxmContextStateRestore(*emuenv.renderer, context, true);

    return 0;
}

EXPORT(int, sceGxmFinish, SceGxmContext *context) {
    TRACY_FUNC(sceGxmFinish, context);
    assert(context);

    if (!context)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_THREAD);

    // Wait on this context's rendering finish code.
    renderer::finish(*emuenv.renderer, context->renderer.get());

    return 0;
}

EXPORT(SceGxmPassType, sceGxmFragmentProgramGetPassType, const SceGxmFragmentProgram *fragmentProgram) {
    TRACY_FUNC(sceGxmFragmentProgramGetPassType, fragmentProgram);
    assert(fragmentProgram);
    STUBBED("SCE_GXM_PASS_TYPE_OPAQUE");
    return SceGxmPassType::SCE_GXM_PASS_TYPE_OPAQUE;
}

EXPORT(Ptr<const SceGxmProgram>, sceGxmFragmentProgramGetProgram, const SceGxmFragmentProgram *fragmentProgram) {
    TRACY_FUNC(sceGxmFragmentProgramGetProgram, fragmentProgram);
    assert(fragmentProgram);
    return fragmentProgram->program;
}

EXPORT(bool, sceGxmFragmentProgramIsEnabled, const SceGxmFragmentProgram *fragmentProgram) {
    TRACY_FUNC(sceGxmFragmentProgramIsEnabled, fragmentProgram);
    assert(fragmentProgram);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmGetContextType, const SceGxmContext *context, SceGxmContextType *type) {
    TRACY_FUNC(sceGxmGetContextType, context, type);
    if (!context || !type) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    *type = context->state.type;

    return 0;
}

EXPORT(int, sceGxmGetDeferredContextFragmentBuffer, const SceGxmContext *deferredContext, Ptr<void> *mem) {
    TRACY_FUNC(sceGxmGetDeferredContextFragmentBuffer, deferredContext, mem);
    if (!deferredContext || !mem) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    *mem = deferredContext->state.fragment_ring_buffer;
    return 0;
}

EXPORT(int, sceGxmGetDeferredContextVdmBuffer, const SceGxmContext *deferredContext, Ptr<void> *mem) {
    TRACY_FUNC(sceGxmGetDeferredContextVdmBuffer, deferredContext, mem);
    if (!deferredContext || !mem) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    *mem = deferredContext->state.vdm_buffer;
    return 0;
}

EXPORT(int, sceGxmGetDeferredContextVertexBuffer, const SceGxmContext *deferredContext, Ptr<void> *mem) {
    TRACY_FUNC(sceGxmGetDeferredContextVertexBuffer, deferredContext, mem);
    if (!deferredContext || !mem) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    *mem = deferredContext->state.vertex_ring_buffer;
    return 0;
}

EXPORT(Ptr<uint32_t>, sceGxmGetNotificationRegion) {
    TRACY_FUNC(sceGxmGetNotificationRegion);
    return emuenv.gxm.notification_region;
}

EXPORT(int, sceGxmGetParameterBufferThreshold, uint32_t *parameterBufferSize) {
    TRACY_FUNC(sceGxmGetParameterBufferThreshold, parameterBufferSize);
    if (!parameterBufferSize) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }
    return UNIMPLEMENTED();
}

EXPORT(uint32_t, sceGxmGetPrecomputedDrawSize, const SceGxmVertexProgram *vertexProgram) {
    TRACY_FUNC(sceGxmGetPrecomputedDrawSize, vertexProgram);
    assert(vertexProgram);

    int max_stream_index = -1;
    for (const SceGxmVertexAttribute &attribute : vertexProgram->attributes) {
        max_stream_index = std::max<int>(attribute.streamIndex, max_stream_index);
    }

    return static_cast<uint32_t>((max_stream_index + 1) * sizeof(StreamData));
}

// Fallback value returned when computed size is zero.
static constexpr SceUInt32 SCE_GXM_PRECOMPUTED_OVERHEAD = 8u;

// Precomputed state size is the sum of the sizes of all uniform buffers and textures.
static SceUInt32 get_precomputed_state_size(const uint16_t buffer_count, const uint16_t texture_count) {
    const SceUInt32 state_size = static_cast<SceUInt32>((buffer_count * sizeof(UniformBuffer)) + (texture_count * sizeof(SceGxmTexture)));

    // Some games expect sceGxmGetPrecomputed*StateSize to return non-zero,
    // even when buffer and texture counts are both zero.
    // This fallback avoids crashes or undefined behavior.
    return state_size > 0 ? state_size : SCE_GXM_PRECOMPUTED_OVERHEAD;
}

EXPORT(SceUInt32, sceGxmGetPrecomputedFragmentStateSize, const SceGxmFragmentProgram *fragmentProgram) {
    TRACY_FUNC(sceGxmGetPrecomputedFragmentStateSize, fragmentProgram);
    assert(fragmentProgram);

    auto &renderer_data = fragmentProgram->renderer_data;
    return get_precomputed_state_size(renderer_data->buffer_count, renderer_data->texture_count);
}

EXPORT(SceUInt32, sceGxmGetPrecomputedVertexStateSize, const SceGxmVertexProgram *vertexProgram) {
    TRACY_FUNC(sceGxmGetPrecomputedVertexStateSize, vertexProgram);
    assert(vertexProgram);

    auto &renderer_data = vertexProgram->renderer_data;
    return get_precomputed_state_size(renderer_data->buffer_count, renderer_data->texture_count);
}

EXPORT(int, sceGxmGetRenderTargetMemSize, const SceGxmRenderTargetParams *params, uint32_t *hostMemSize) {
    TRACY_FUNC(sceGxmGetRenderTargetMemSize, params, hostMemSize);
    if (!params || !hostMemSize)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    *hostMemSize = static_cast<uint32_t>(KiB(64));
    return STUBBED("64KiB emuenv mem");
}

EXPORT(int, sceGxmInitialize, const SceGxmInitializeParams *params) {
    TRACY_FUNC(sceGxmInitialize, params);
    if (!params) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (((params->flags != 0x00000000U) && (params->flags != 0x00010000U) && (params->flags != 0x00020000U)) || (params->parameterBufferSize & 0x3FFFF)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if ((params->displayQueueMaxPendingCount * params->displayQueueCallbackDataSize) > 0x200) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    emuenv.gxm.params = *params;
    // hack, limit the number of frame rendering at the same time to at most 3
    // also, the last frame won't be in the queue so decrease the count by 1
    // the case where displayQueueMaxPendingCount is 1 handled in sceGxmDisplayQueueAddEntry
    const uint32_t max_queue_size = std::max(std::min(params->displayQueueMaxPendingCount, 3U) - 1, 1U);
    emuenv.gxm.display_queue.maxPendingCount_ = max_queue_size;

    const ThreadStatePtr main_thread = emuenv.kernel.get_thread(thread_id);
    const ThreadStatePtr display_queue_thread = emuenv.kernel.create_thread(emuenv.mem, "SceGxmDisplayQueue", Ptr<void>(0), SCE_KERNEL_HIGHEST_PRIORITY_USER, SCE_KERNEL_THREAD_CPU_AFFINITY_MASK_DEFAULT, SCE_KERNEL_STACK_SIZE_USER_DEFAULT, nullptr);
    if (!display_queue_thread) {
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);
    }
    emuenv.gxm.display_queue_thread = display_queue_thread->id;

    // Reset the queue in case sceGxmTerminate was called earlier
    emuenv.gxm.display_queue.reset();
    std::thread display_host_thread(display_entry_thread, std::ref(emuenv));
    display_host_thread.detach();
    emuenv.gxm.notification_region = Ptr<uint32_t>(alloc(emuenv.mem, MiB(1), "SceGxmNotificationRegion"));
    memset(emuenv.gxm.notification_region.get(emuenv.mem), 0, MiB(1));
    return 0;
}

EXPORT(int, sceGxmIsDebugVersion) {
    TRACY_FUNC(sceGxmIsDebugVersion);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmMapFragmentUsseMemory, Ptr<void> base, uint32_t size, uint32_t *offset) {
    TRACY_FUNC(sceGxmMapFragmentUsseMemory, base, size, offset);
    STUBBED("always return success");

    if (!base || !offset) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    // TODO What should this be?
    *offset = base.address();

    return 0;
}

EXPORT(int, sceGxmMapMemory, Ptr<void> base, uint32_t size, uint32_t attribs) {
    TRACY_FUNC(sceGxmMapMemory, base, size, attribs);
    if (!base) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((base.address() % KiB(4) != 0) || (size % KiB(4) != 0))
        LOG_WARN_ONCE("Mapping unaligned GPU memory");

    // Make sure the base address and size are 4KiB-aligned
    Address aligned_base = align_down(base.address(), KiB(4));
    size = align(base.address() + size, KiB(4)) - aligned_base;

    // Check if it has already been mapped
    // Some games intentionally overlapping mapped region. Nothing we can do. Allow it, bear your own consequences.
    GxmState &gxm = emuenv.gxm;
    auto ite = gxm.memory_mapped_regions.lower_bound(aligned_base);
    if ((ite == gxm.memory_mapped_regions.end()) || (ite->first != aligned_base)) {
        if ((ite != gxm.memory_mapped_regions.end()) && ((aligned_base + size) > ite->first)) {
            LOG_ERROR("Overlapping mapped memory detected");

            if (emuenv.renderer->features.support_memory_mapping) {
                // overlapping memory mapping is not supported
                return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
            }
        }
        gxm.memory_mapped_regions.emplace(aligned_base, MemoryMapInfo{ aligned_base, size, attribs });

        // little big planet maps regions of size 0
        if (emuenv.renderer->features.support_memory_mapping && size > 0)
            renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::MemoryMap, true, aligned_base, size);

        return 0;
    }

    return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
}

EXPORT(int, sceGxmMapVertexUsseMemory, Ptr<void> base, uint32_t size, uint32_t *offset) {
    TRACY_FUNC(sceGxmMapVertexUsseMemory, base, size, offset);
    STUBBED("always return success");

    if (!base || !offset) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    // TODO What should this be?
    *offset = base.address();

    return 0;
}

EXPORT(int, sceGxmMidSceneFlush, SceGxmContext *immediateContext, uint32_t flags, SceGxmSyncObject *vertexSyncObject, const SceGxmNotification *vertexNotification) {
    TRACY_FUNC(sceGxmMidSceneFlush, immediateContext, flags, vertexSyncObject, vertexNotification);
    if (flags != 0)
        STUBBED("Flags ignored");

    if (!immediateContext)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if ((flags & 0xFFFFFFFE) || (immediateContext->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    if (vertexSyncObject != nullptr)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!immediateContext->state.active)
        return RET_ERROR(SCE_GXM_ERROR_NOT_WITHIN_SCENE);

    SceGxmNotification notification = vertexNotification ? *vertexNotification : SceGxmNotification{ Ptr<uint32_t>(0), 0 };
    renderer::add_command(immediateContext->renderer.get(), renderer::CommandOpcode::MidSceneFlush, nullptr, notification);

    if (vertexNotification) {
        // this is necessary only if the program could wait for a notification
        renderer::submit_command_list(*emuenv.renderer, immediateContext->renderer.get(), immediateContext->renderer->command_list);
        renderer::reset_command_list(immediateContext->renderer->command_list);
    }

    return 0;
}

EXPORT(int, _sceGxmMidSceneFlush, SceGxmContext *immediateContext, uint32_t flags, SceGxmSyncObject *vertexSyncObject, const SceGxmNotification *vertexNotification) {
    TRACY_FUNC(_sceGxmMidSceneFlush, immediateContext, flags, vertexSyncObject, vertexNotification);
    return CALL_EXPORT(sceGxmMidSceneFlush, immediateContext, flags, vertexSyncObject, vertexNotification);
}

EXPORT(int, sceGxmNotificationWait, const SceGxmNotification *notification) {
    TRACY_FUNC(sceGxmNotificationWait, notification);
    if (!notification) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    std::uint32_t volatile *value = notification->address.get(emuenv.mem);
    const std::uint32_t target_value = notification->value;

    std::unique_lock<std::mutex> lock(emuenv.renderer->notification_mutex);
    if (*value != target_value) {
        emuenv.renderer->notification_ready.wait(lock, [&]() { return *value == target_value; });
    }

    return 0;
}

EXPORT(int, sceGxmPadHeartbeat, const SceGxmColorSurface *displaySurface, SceGxmSyncObject *displaySyncObject) {
    TRACY_FUNC(sceGxmPadHeartbeat, displaySurface, displaySyncObject);
    if (!displaySurface || !displaySyncObject)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    return 0;
}

EXPORT(int, sceGxmPadTriggerGpuPaTrace) {
    TRACY_FUNC(sceGxmPadTriggerGpuPaTrace);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmPopUserMarker) {
    TRACY_FUNC(sceGxmPopUserMarker);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmPrecomputedDrawInit, SceGxmPrecomputedDraw *state, Ptr<const SceGxmVertexProgram> program, Ptr<void> extra_data) {
    TRACY_FUNC(sceGxmPrecomputedDrawInit, state, program, extra_data);
    if (!state || !program || !extra_data) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (extra_data.address() & 0xF) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);
    }

    SceGxmPrecomputedDraw new_draw;

    new_draw.program = program;

    uint16_t max_stream_index = 0;
    const auto &gxm_vertex_program = *program.get(emuenv.mem);
    for (const SceGxmVertexAttribute &attribute : gxm_vertex_program.attributes) {
        max_stream_index = std::max(attribute.streamIndex, max_stream_index);
    }

    new_draw.stream_count = max_stream_index + 1;
    new_draw.stream_data = extra_data.cast<StreamData>();

    *state = new_draw;

    return 0;
}

EXPORT(int, sceGxmPrecomputedDrawSetAllVertexStreams, SceGxmPrecomputedDraw *state, const Ptr<const void> *stream_data) {
    TRACY_FUNC(sceGxmPrecomputedDrawSetAllVertexStreams, state, stream_data);
    if (!state) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const auto state_stream_data = state->stream_data.get(emuenv.mem);
    for (int i = 0; i < state->stream_count; ++i) {
        state_stream_data[i] = stream_data[i];
    }

    return 0;
}

EXPORT(int, sceGxmPrecomputedDrawSetParams, SceGxmPrecomputedDraw *state, SceGxmPrimitiveType type, SceGxmIndexFormat index_format, Ptr<const void> index_data, uint32_t vertex_count) {
    TRACY_FUNC(sceGxmPrecomputedDrawSetParams, state, type, index_format, index_data, vertex_count);
    if (!state || !index_data) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    state->type = type;
    state->index_format = index_format;
    state->index_data = index_data;
    state->vertex_count = vertex_count;
    state->instance_count = 1;

    return 0;
}

EXPORT(int, sceGxmPrecomputedDrawSetParamsInstanced, SceGxmPrecomputedDraw *precomputedDraw, SceGxmPrimitiveType primType, SceGxmIndexFormat indexType, Ptr<const void> indexData, uint32_t indexCount, uint32_t indexWrap) {
    TRACY_FUNC(sceGxmPrecomputedDrawSetParamsInstanced, precomputedDraw, primType, indexType, indexData, indexCount, indexWrap);
    if (!precomputedDraw || !indexData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    precomputedDraw->type = primType;
    precomputedDraw->index_format = indexType;
    precomputedDraw->index_data = indexData;
    if (indexWrap == 0) {
        precomputedDraw->vertex_count = 0;
        precomputedDraw->instance_count = 0;
    } else {
        precomputedDraw->vertex_count = indexWrap;
        precomputedDraw->instance_count = indexCount / indexWrap;
    }

    return 0;
}

EXPORT(int, sceGxmPrecomputedDrawSetVertexStream, SceGxmPrecomputedDraw *state, uint32_t streamIndex, Ptr<const void> streamData) {
    TRACY_FUNC(sceGxmPrecomputedDrawSetVertexStream, state, streamIndex, streamData);
    if (!state) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (streamIndex > (SCE_GXM_MAX_VERTEX_STREAMS - 1)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!streamData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const auto stream_data = state->stream_data.get(emuenv.mem);
    stream_data[streamIndex] = streamData;

    return 0;
}

EXPORT(Ptr<const void>, sceGxmPrecomputedFragmentStateGetDefaultUniformBuffer, const SceGxmPrecomputedFragmentState *state) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateGetDefaultUniformBuffer, state);
    UniformBuffer *uniform_buffers = state->uniform_buffers.get(emuenv.mem);
    return uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX];
}

EXPORT(int, sceGxmPrecomputedFragmentStateInit, SceGxmPrecomputedFragmentState *state, Ptr<const SceGxmFragmentProgram> program, Ptr<void> extra_data) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateInit, state, program, extra_data);
    if (!state || !program || !extra_data) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (extra_data.address() & 0xF) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);
    }

    SceGxmPrecomputedFragmentState new_state;
    new_state.program = program;

    auto &renderer_data = program.get(emuenv.mem)->renderer_data;
    new_state.texture_count = renderer_data->texture_count;
    new_state.buffer_count = renderer_data->buffer_count;

    new_state.textures = extra_data.cast<TextureData>();
    new_state.uniform_buffers = (extra_data.cast<TextureData>() + new_state.texture_count).cast<UniformBuffer>();

    *state = new_state;

    return 0;
}

EXPORT(int, sceGxmPrecomputedFragmentStateSetAllAuxiliarySurfaces) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateSetAllAuxiliarySurfaces);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmPrecomputedFragmentStateSetAllTextures, SceGxmPrecomputedFragmentState *state, Ptr<const SceGxmTexture> textures) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateSetAllTextures, state, textures);
    if (!state || !textures) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const auto state_textures = state->textures.get(emuenv.mem);
    for (int i = 0; i < state->texture_count; ++i) {
        state_textures[i] = textures.get(emuenv.mem)[i];
    }

    return 0;
}

EXPORT(int, sceGxmPrecomputedFragmentStateSetAllUniformBuffers, SceGxmPrecomputedFragmentState *precomputedState, Ptr<const void> const *bufferDataArray) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateSetAllUniformBuffers, precomputedState, bufferDataArray);
    if (!precomputedState || !precomputedState->uniform_buffers || !bufferDataArray)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    UniformBuffer *uniform_buffers = precomputedState->uniform_buffers.get(emuenv.mem);
    if (!uniform_buffers)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    for (int i = 0; i + SCE_GXM_UNIFORM_BUFFER_OFFSET < precomputedState->buffer_count; i++)
        uniform_buffers[i + SCE_GXM_UNIFORM_BUFFER_OFFSET] = bufferDataArray[i];

    return 0;
}

EXPORT(int, sceGxmPrecomputedFragmentStateSetDefaultUniformBuffer, SceGxmPrecomputedFragmentState *state, Ptr<const void> buffer) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateSetDefaultUniformBuffer, state, buffer);
    if (!state || !buffer) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (state->buffer_count == 0)
        return 0;

    UniformBuffer *uniform_buffers = state->uniform_buffers.get(emuenv.mem);
    uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = buffer;

    return 0;
}

EXPORT(int, sceGxmPrecomputedFragmentStateSetTexture, SceGxmPrecomputedFragmentState *state, uint32_t index, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateSetTexture, state, index, texture);
    if (!state)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (index >= SCE_GXM_MAX_TEXTURE_UNITS)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (index < state->texture_count) {
        const auto state_textures = state->textures.get(emuenv.mem);
        state_textures[index] = *texture;
    }

    return 0;
}

EXPORT(int, sceGxmPrecomputedFragmentStateSetUniformBuffer, SceGxmPrecomputedFragmentState *precomputedState, uint32_t bufferIndex, Ptr<const void> bufferData) {
    TRACY_FUNC(sceGxmPrecomputedFragmentStateSetUniformBuffer, precomputedState, bufferIndex, bufferData);
    if (!precomputedState) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (bufferIndex >= SCE_GXM_MAX_UNIFORM_BUFFERS) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!bufferData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (bufferIndex + SCE_GXM_UNIFORM_BUFFER_OFFSET < precomputedState->buffer_count) {
        UniformBuffer *uniform_buffers = precomputedState->uniform_buffers.get(emuenv.mem);
        uniform_buffers[bufferIndex + SCE_GXM_UNIFORM_BUFFER_OFFSET] = bufferData;
    }

    return 0;
}

EXPORT(Ptr<const void>, sceGxmPrecomputedVertexStateGetDefaultUniformBuffer, SceGxmPrecomputedVertexState *state) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateGetDefaultUniformBuffer, state);
    UniformBuffer *uniform_buffers = state->uniform_buffers.get(emuenv.mem);
    return uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX];
}

EXPORT(int, sceGxmPrecomputedVertexStateInit, SceGxmPrecomputedVertexState *state, Ptr<const SceGxmVertexProgram> program, Ptr<void> extra_data) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateInit, state, program, extra_data);
    if (!state || !program || !extra_data) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (extra_data.address() & 0xF) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);
    }

    SceGxmPrecomputedVertexState new_state;
    new_state.program = program;

    auto &renderer_data = program.get(emuenv.mem)->renderer_data;
    new_state.texture_count = renderer_data->texture_count;
    new_state.buffer_count = renderer_data->buffer_count;

    new_state.textures = extra_data.cast<TextureData>();
    new_state.uniform_buffers = (extra_data.cast<TextureData>() + new_state.texture_count).cast<UniformBuffer>();

    *state = new_state;

    return 0;
}

EXPORT(int, sceGxmPrecomputedVertexStateSetAllTextures, SceGxmPrecomputedVertexState *precomputedState, Ptr<const SceGxmTexture> textureArray) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateSetAllTextures, precomputedState, textureArray);
    if (!precomputedState || !textureArray) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const auto state_textures = precomputedState->textures.get(emuenv.mem);
    for (int i = 0; i < precomputedState->texture_count; ++i) {
        state_textures[i] = textureArray.get(emuenv.mem)[i];
    }

    return 0;
}

EXPORT(int, sceGxmPrecomputedVertexStateSetAllUniformBuffers, SceGxmPrecomputedVertexState *precomputedState, Ptr<const void> const *bufferDataArray) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateSetAllUniformBuffers, precomputedState, bufferDataArray);
    if (!precomputedState || !precomputedState->uniform_buffers || !bufferDataArray)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    UniformBuffer *uniform_buffers = precomputedState->uniform_buffers.get(emuenv.mem);
    if (!uniform_buffers)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    for (int i = 0; i + SCE_GXM_UNIFORM_BUFFER_OFFSET < precomputedState->buffer_count; i++)
        uniform_buffers[i + SCE_GXM_UNIFORM_BUFFER_OFFSET] = bufferDataArray[i];

    return 0;
}

EXPORT(int, sceGxmPrecomputedVertexStateSetDefaultUniformBuffer, SceGxmPrecomputedVertexState *state, Ptr<const void> buffer) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateSetDefaultUniformBuffer, state, buffer);
    if (!state || !buffer) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (state->buffer_count == 0)
        return 0;

    UniformBuffer *uniform_buffers = state->uniform_buffers.get(emuenv.mem);
    uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = buffer;

    return 0;
}

EXPORT(int, sceGxmPrecomputedVertexStateSetTexture, SceGxmPrecomputedVertexState *precomputedState, uint32_t textureIndex, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateSetTexture, precomputedState, textureIndex, texture);
    if (!precomputedState)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (textureIndex >= SCE_GXM_MAX_TEXTURE_UNITS)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (textureIndex < precomputedState->texture_count) {
        const auto state_textures = precomputedState->textures.get(emuenv.mem);
        state_textures[textureIndex] = *texture;
    }

    return 0;
}

EXPORT(int, sceGxmPrecomputedVertexStateSetUniformBuffer, SceGxmPrecomputedVertexState *precomputedState, uint32_t bufferIndex, Ptr<const void> bufferData) {
    TRACY_FUNC(sceGxmPrecomputedVertexStateSetUniformBuffer, precomputedState, bufferIndex, bufferData);
    if (!precomputedState) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (bufferIndex >= SCE_GXM_MAX_UNIFORM_BUFFERS) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (!bufferData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (bufferIndex + SCE_GXM_UNIFORM_BUFFER_OFFSET < precomputedState->buffer_count) {
        UniformBuffer *uniform_buffers = precomputedState->uniform_buffers.get(emuenv.mem);
        uniform_buffers[bufferIndex + SCE_GXM_UNIFORM_BUFFER_OFFSET] = bufferData;
    }

    return 0;
}

EXPORT(int, sceGxmProgramCheck, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramCheck, program);
    if (!program)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    if (memcmp(&program->magic, "GXP", 4) != 0)
        return RET_ERROR(SCE_GXM_ERROR_NULL_PROGRAM);

    return 0;
}

EXPORT(Ptr<SceGxmProgramParameter>, sceGxmProgramFindParameterByName, Ptr<const SceGxmProgram> program_ptr, const char *name) {
    TRACY_FUNC(sceGxmProgramFindParameterByName, program_ptr, name);
    if (!program_ptr || !name)
        return Ptr<SceGxmProgramParameter>();

    auto program = program_ptr.get(emuenv.mem);
    auto parameters = program->program_parameters();
    for (uint32_t i = 0; i < program->parameter_count; ++i) {
        auto parameter = &parameters[i];
        if (strcmp(parameter->name(), name) == 0) {
            return Ptr<SceGxmProgramParameter>(program_ptr.address() + int(size_t(parameter) - size_t(program)));
        }
    }
    return Ptr<SceGxmProgramParameter>();
}

EXPORT(Ptr<SceGxmProgramParameter>, sceGxmProgramFindParameterBySemantic, Ptr<const SceGxmProgram> program_ptr, SceGxmParameterSemantic semantic, uint32_t index) {
    TRACY_FUNC(sceGxmProgramFindParameterBySemantic, program_ptr, semantic, index);

    if (semantic == SCE_GXM_PARAMETER_SEMANTIC_NONE) {
        return Ptr<SceGxmProgramParameter>();
    }

    assert(program_ptr);
    if (!program_ptr)
        return Ptr<SceGxmProgramParameter>();

    auto program = program_ptr.get(emuenv.mem);
    auto parameters = program->program_parameters();
    for (uint32_t i = 0; i < program->parameter_count; ++i) {
        auto parameter = &parameters[i];
        if ((parameter->semantic == semantic) && (parameter->semantic_index == index)) {
            return Ptr<SceGxmProgramParameter>(program_ptr.address() + int(size_t(parameter) - size_t(program)));
        }
    }

    return Ptr<SceGxmProgramParameter>();
}

EXPORT(Ptr<SceGxmProgramParameter>, _sceGxmProgramFindParameterBySemantic, Ptr<const SceGxmProgram> program_ptr, SceGxmParameterSemantic semantic, uint32_t index) {
    TRACY_FUNC(_sceGxmProgramFindParameterBySemantic, program_ptr, semantic, index);
    return CALL_EXPORT(sceGxmProgramFindParameterBySemantic, program_ptr, semantic, index);
}

EXPORT(uint32_t, sceGxmProgramGetDefaultUniformBufferSize, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramGetDefaultUniformBufferSize, program);
    return program->default_uniform_buffer_count * sizeof(Ptr<const void>);
}

EXPORT(uint32_t, sceGxmProgramGetFragmentProgramInputs, Ptr<const SceGxmProgram> program_) {
    TRACY_FUNC(sceGxmProgramGetFragmentProgramInputs, program_);
    const auto program = program_.get(emuenv.mem);

    return static_cast<uint32_t>(gxp::get_fragment_inputs(*program));
}

EXPORT(int, sceGxmProgramGetOutputRegisterFormat, const SceGxmProgram *program, SceGxmParameterType *type, uint32_t *componentCount) {
    TRACY_FUNC(sceGxmProgramGetOutputRegisterFormat, program, type, componentCount);
    if (!program || !type || !componentCount)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!program->is_fragment())
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    *type = program->get_fragment_output_type();
    *componentCount = program->get_fragment_output_component_count();
    return 0;
}

EXPORT(Ptr<SceGxmProgramParameter>, sceGxmProgramGetParameter, Ptr<const SceGxmProgram> program_ptr, uint32_t index) {
    TRACY_FUNC(sceGxmProgramGetParameter, program_ptr, index);

    auto program = program_ptr.get(emuenv.mem);
    if (index >= program->parameter_count)
        return Ptr<SceGxmProgramParameter>(0);
    auto parameters = program->program_parameters();
    auto parameter = &parameters[index];
    return Ptr<SceGxmProgramParameter>(program_ptr.address() + int(size_t(parameter) - size_t(program)));
}

EXPORT(uint32_t, sceGxmProgramGetParameterCount, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramGetParameterCount, program);
    assert(program);
    return program->parameter_count;
}

EXPORT(uint32_t, sceGxmProgramGetSize, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramGetSize, program);
    assert(program);
    return program->size;
}

EXPORT(SceGxmProgramType, sceGxmProgramGetType, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramGetType, program);
    assert(program);
    return program->get_type();
}

EXPORT(uint32_t, sceGxmProgramGetVertexProgramOutputs, Ptr<const SceGxmProgram> program_) {
    TRACY_FUNC(sceGxmProgramGetVertexProgramOutputs, program_);
    const auto program = program_.get(emuenv.mem);

    return static_cast<uint32_t>(gxp::get_vertex_outputs(*program));
}

EXPORT(bool, sceGxmProgramIsDepthReplaceUsed, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramIsDepthReplaceUsed, program);
    assert(program);
    return program->is_depth_replace_used();
}

EXPORT(bool, sceGxmProgramIsDiscardUsed, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramIsDiscardUsed, program);
    assert(program);
    return program->is_discard_used();
}

EXPORT(bool, sceGxmProgramIsEquivalent, const SceGxmProgram *programA, const SceGxmProgram *programB) {
    TRACY_FUNC(sceGxmProgramIsEquivalent, programA, programB);
    if (!programA || !programB) {
        LOG_ERROR("SCE_GXM_ERROR_INVALID_POINTER");
        return false;
    }

    return (programA->size == programB->size) && (memcmp(programA, programB, programA->size) == 0);
}

EXPORT(bool, sceGxmProgramIsFragColorUsed, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramIsFragColorUsed, program);
    assert(program);
    return program->is_frag_color_used();
}

EXPORT(bool, sceGxmProgramIsNativeColorUsed, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramIsNativeColorUsed, program);
    assert(program);
    return program->is_native_color();
}

EXPORT(bool, sceGxmProgramIsSpriteCoordUsed, const SceGxmProgram *program) {
    TRACY_FUNC(sceGxmProgramIsSpriteCoordUsed, program);
    assert(program);
    return program->is_sprite_coord_used();
}

EXPORT(uint32_t, sceGxmProgramParameterGetArraySize, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetArraySize, parameter);
    assert(parameter);
    return parameter->array_size;
}

EXPORT(int, sceGxmProgramParameterGetCategory, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetCategory, parameter);
    assert(parameter);
    return parameter->category;
}

EXPORT(uint32_t, sceGxmProgramParameterGetComponentCount, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetComponentCount, parameter);
    assert(parameter);
    return parameter->component_count;
}

EXPORT(uint32_t, sceGxmProgramParameterGetContainerIndex, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetContainerIndex, parameter);
    assert(parameter);
    return parameter->container_index;
}

EXPORT(uint32_t, sceGxmProgramParameterGetIndex, const SceGxmProgram *program, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetIndex, program, parameter);
    auto parameters = program->program_parameters();
    if (parameters) {
        return static_cast<uint32_t>(parameter - parameters);
    }
    return 0;
}

EXPORT(Ptr<const char>, sceGxmProgramParameterGetName, Ptr<const SceGxmProgramParameter> parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetName, parameter);
    if (!parameter)
        return {};
    return Ptr<const char>(parameter.address() + parameter.get(emuenv.mem)->name_offset);
}

EXPORT(uint32_t, sceGxmProgramParameterGetResourceIndex, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetResourceIndex, parameter);
    assert(parameter);
    return parameter->resource_index;
}

EXPORT(int, sceGxmProgramParameterGetSemantic, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetSemantic, parameter);
    assert(parameter);

    if (parameter->category != SCE_GXM_PARAMETER_CATEGORY_ATTRIBUTE)
        return SCE_GXM_PARAMETER_SEMANTIC_NONE;

    return parameter->semantic;
}

EXPORT(int, _sceGxmProgramParameterGetSemantic, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(_sceGxmProgramParameterGetSemantic, parameter);
    return CALL_EXPORT(sceGxmProgramParameterGetSemantic, parameter);
}

EXPORT(uint32_t, sceGxmProgramParameterGetSemanticIndex, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetSemanticIndex, parameter);
    return parameter->semantic_index & 0xf;
}

EXPORT(int, sceGxmProgramParameterGetType, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterGetType, parameter);
    return parameter->type;
}

EXPORT(bool, sceGxmProgramParameterIsRegFormat, const SceGxmProgram *program, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterIsRegFormat, program, parameter);
    if (program->is_fragment()) {
        return false;
    }

    if (parameter->category != SceGxmParameterCategory::SCE_GXM_PARAMETER_CATEGORY_ATTRIBUTE) {
        return false;
    }

    const uint64_t regformat_indices = program->vertex_varyings()->untyped_pa_regs;
    return (regformat_indices & (1ULL << parameter->resource_index)) != 0;
}

EXPORT(bool, sceGxmProgramParameterIsSamplerCube, const SceGxmProgramParameter *parameter) {
    TRACY_FUNC(sceGxmProgramParameterIsSamplerCube, parameter);
    return parameter->is_sampler_cube();
}

EXPORT(int, sceGxmPushUserMarker) {
    TRACY_FUNC(sceGxmPushUserMarker);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmRemoveRazorGpuCaptureBuffer) {
    TRACY_FUNC(sceGxmRemoveRazorGpuCaptureBuffer);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmRenderTargetGetDriverMemBlock, const SceGxmRenderTarget *renderTarget, SceUID *driverMemBlock) {
    TRACY_FUNC(sceGxmRenderTargetGetDriverMemBlock, renderTarget, driverMemBlock);
    if (!renderTarget || !driverMemBlock) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    *driverMemBlock = renderTarget->driverMemBlock;

    return 0;
}

EXPORT(int, sceGxmReserveFragmentDefaultUniformBuffer, SceGxmContext *context, Ptr<void> *uniformBuffer) {
    TRACY_FUNC(sceGxmReserveFragmentDefaultUniformBuffer, context, uniformBuffer);
    if (!context || !uniformBuffer)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    const auto fragment_program = context->state.fragment_program.get(emuenv.mem);
    const auto program = fragment_program->program.get(emuenv.mem);

    const size_t size = (size_t)program->default_uniform_buffer_count * 4;
    // data for the ring buffer must be 4 bytes aligned
    context->state.fragment_ring_buffer_used = align(context->state.fragment_ring_buffer_used, 4);
    const size_t next_used = context->state.fragment_ring_buffer_used + size;

    if (size == 0) {
        *uniformBuffer = Ptr<void>();
        context->state.fragment_uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = *uniformBuffer;

        return 0;
    }

    if (next_used > context->state.fragment_ring_buffer_size) {
        if (context->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
            context->state.fragment_ring_buffer = gxmRunDeferredMemoryCallback(emuenv.kernel, emuenv.mem, emuenv.gxm.callback_lock, context->state.fragment_ring_buffer_size,
                context->state.fragment_memory_callback, context->state.memory_callback_userdata, DEFAULT_RING_SIZE, thread_id);

            if (!context->state.fragment_ring_buffer) {
                return RET_ERROR(SCE_GXM_ERROR_RESERVE_FAILED);
            }
        }

        context->state.fragment_ring_buffer_used = 0;
    }

    *uniformBuffer = context->state.fragment_ring_buffer.cast<uint8_t>() + static_cast<int32_t>(context->state.fragment_ring_buffer_used);
    context->was_frag_default_uniform_reserved = true;
    context->state.fragment_uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = *uniformBuffer;

    return 0;
}

EXPORT(int, sceGxmRenderTargetGetHostMem) {
    TRACY_FUNC(sceGxmRenderTargetGetHostMem);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmReserveVertexDefaultUniformBuffer, SceGxmContext *context, Ptr<void> *uniformBuffer) {
    TRACY_FUNC(sceGxmReserveVertexDefaultUniformBuffer, context, uniformBuffer);
    if (!context || !uniformBuffer)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    const auto vertex_program = context->state.vertex_program.get(emuenv.mem);
    const auto program = vertex_program->program.get(emuenv.mem);

    const size_t size = (size_t)program->default_uniform_buffer_count * 4;
    // data for the ring buffer must be 4 bytes aligned
    context->state.vertex_ring_buffer_used = align(context->state.vertex_ring_buffer_used, 4);
    const size_t next_used = context->state.vertex_ring_buffer_used + size;

    if (size == 0) {
        *uniformBuffer = Ptr<void>();
        context->state.vertex_uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = *uniformBuffer;

        return 0;
    }

    if (next_used > context->state.vertex_ring_buffer_size) {
        if (context->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE) {
            context->state.vertex_ring_buffer = gxmRunDeferredMemoryCallback(emuenv.kernel, emuenv.mem, emuenv.gxm.callback_lock, context->state.vertex_ring_buffer_size,
                context->state.vertex_memory_callback, context->state.memory_callback_userdata, DEFAULT_RING_SIZE, thread_id);

            if (!context->state.vertex_ring_buffer) {
                return RET_ERROR(SCE_GXM_ERROR_RESERVE_FAILED);
            }
        }

        context->state.vertex_ring_buffer_used = 0;
    }

    *uniformBuffer = context->state.vertex_ring_buffer.cast<uint8_t>() + static_cast<int32_t>(context->state.vertex_ring_buffer_used);
    context->was_vert_default_uniform_reserved = true;
    context->state.vertex_uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = *uniformBuffer;

    return 0;
}

EXPORT(int, sceGxmSetAuxiliarySurface) {
    TRACY_FUNC(sceGxmSetAuxiliarySurface);
    return UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetBackDepthBias, SceGxmContext *context, int32_t factor, int32_t units) {
    TRACY_FUNC(sceGxmSetBackDepthBias, context, factor, units);
    if ((context->state.back_depth_bias_factor != factor) || (context->state.back_depth_bias_units != units)) {
        context->state.back_depth_bias_factor = factor;
        context->state.back_depth_bias_units = units;

        if (context->alloc_space) {
            renderer::set_depth_bias(*emuenv.renderer, context->renderer.get(), false, factor, units);
        }
    }
}

EXPORT(void, sceGxmSetBackDepthFunc, SceGxmContext *context, SceGxmDepthFunc depthFunc) {
    TRACY_FUNC(sceGxmSetBackDepthFunc, context, depthFunc);
    if (context->state.back_depth_func != depthFunc) {
        context->state.back_depth_func = depthFunc;

        if (context->alloc_space) {
            renderer::set_depth_func(*emuenv.renderer, context->renderer.get(), false, depthFunc);
        }
    }
}

EXPORT(void, sceGxmSetBackDepthWriteEnable, SceGxmContext *context, SceGxmDepthWriteMode enable) {
    TRACY_FUNC(sceGxmSetBackDepthWriteEnable, context, enable);
    if (context->state.back_depth_write_enable != enable) {
        context->state.back_depth_write_enable = enable;

        if (context->alloc_space) {
            renderer::set_depth_write_enable_mode(*emuenv.renderer, context->renderer.get(), false, enable);
        }
    }
}

EXPORT(void, sceGxmSetBackFragmentProgramEnable, SceGxmContext *context, SceGxmFragmentProgramMode enable) {
    TRACY_FUNC(sceGxmSetBackFragmentProgramEnable, context, enable);
    renderer::set_side_fragment_program_enable(*emuenv.renderer, context->renderer.get(), false, enable);
}

EXPORT(void, sceGxmSetBackLineFillLastPixelEnable, SceGxmContext *context, SceGxmLineFillLastPixelMode enable) {
    TRACY_FUNC(sceGxmSetBackLineFillLastPixelEnable, context, enable);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetBackPointLineWidth, SceGxmContext *context, uint32_t width) {
    TRACY_FUNC(sceGxmSetBackPointLineWidth, context, width);
    if (context->state.back_point_line_width != width) {
        context->state.back_point_line_width = width;

        if (context->alloc_space) {
            renderer::set_point_line_width(*emuenv.renderer, context->renderer.get(), false, width);
        }
    }
}

EXPORT(void, sceGxmSetBackPolygonMode, SceGxmContext *context, SceGxmPolygonMode mode) {
    TRACY_FUNC(sceGxmSetBackPolygonMode, context, mode);
    if (context->state.back_polygon_mode != mode) {
        context->state.back_polygon_mode = mode;

        if (context->alloc_space) {
            renderer::set_polygon_mode(*emuenv.renderer, context->renderer.get(), false, mode);
        }
    }
}

EXPORT(void, sceGxmSetBackStencilFunc, SceGxmContext *context, SceGxmStencilFunc func, SceGxmStencilOp stencilFail, SceGxmStencilOp depthFail, SceGxmStencilOp depthPass, int32_t compareMask, uint32_t writeMask) {
    TRACY_FUNC(sceGxmSetBackStencilFunc, context, func, stencilFail, depthFail, depthPass, compareMask, writeMask);
    // compareMask and depthMask should be uint8_t, however the compiler optimizes the call if this is the case...
    const uint8_t compare_mask = static_cast<uint8_t>(compareMask);
    const uint8_t write_mask = static_cast<uint8_t>(writeMask);
    if ((context->state.back_stencil.func != func)
        || (context->state.back_stencil.stencil_fail != stencilFail)
        || (context->state.back_stencil.depth_fail != depthFail)
        || (context->state.back_stencil.depth_pass != depthPass)
        || (context->state.back_stencil.compare_mask != compare_mask)
        || (context->state.back_stencil.write_mask != write_mask)) {
        context->state.back_stencil.func = func;
        context->state.back_stencil.stencil_fail = stencilFail;
        context->state.back_stencil.depth_fail = depthFail;
        context->state.back_stencil.depth_pass = depthPass;
        context->state.back_stencil.compare_mask = compare_mask;
        context->state.back_stencil.write_mask = write_mask;

        if (context->alloc_space) {
            renderer::set_stencil_func(*emuenv.renderer, context->renderer.get(), false, func, stencilFail, depthFail, depthPass, compare_mask, write_mask);
        }
    }
}

EXPORT(void, sceGxmSetBackStencilRef, SceGxmContext *context, uint8_t sref) {
    TRACY_FUNC(sceGxmSetBackStencilRef, context, sref);
    if (context->state.back_stencil.ref != sref) {
        context->state.back_stencil.ref = sref;

        if (context->alloc_space)
            renderer::set_stencil_ref(*emuenv.renderer, context->renderer.get(), false, sref);
    }
}

EXPORT(void, sceGxmSetBackVisibilityTestEnable, SceGxmContext *context, SceGxmVisibilityTestMode enable) {
    TRACY_FUNC(sceGxmSetBackVisibilityTestEnable, context, enable);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetBackVisibilityTestIndex, SceGxmContext *context, uint32_t index) {
    TRACY_FUNC(sceGxmSetBackVisibilityTestIndex, context, index);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetBackVisibilityTestOp, SceGxmContext *context, SceGxmVisibilityTestOp op) {
    TRACY_FUNC(sceGxmSetBackVisibilityTestOp, context, op);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetCullMode, SceGxmContext *context, SceGxmCullMode mode) {
    TRACY_FUNC(sceGxmSetCullMode, context, mode);
    if (context->state.cull_mode != mode) {
        context->state.cull_mode = mode;

        if (context->alloc_space)
            renderer::set_cull_mode(*emuenv.renderer, context->renderer.get(), mode);
    }
}

static constexpr std::uint32_t SCE_GXM_DEFERRED_CONTEXT_MINIMUM_BUFFER_SIZE = 1024;

EXPORT(int, sceGxmSetDeferredContextFragmentBuffer, SceGxmContext *deferredContext, Ptr<void> mem, uint32_t size) {
    TRACY_FUNC(sceGxmSetDeferredContextFragmentBuffer, deferredContext, mem, size);
    if (!deferredContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    if ((size != 0) && (size < SCE_GXM_DEFERRED_CONTEXT_MINIMUM_BUFFER_SIZE)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (mem && !size) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    // Use the one specified
    deferredContext->state.fragment_ring_buffer = mem;
    deferredContext->state.fragment_ring_buffer_size = size;

    return 0;
}

EXPORT(int, sceGxmSetDeferredContextVdmBuffer, SceGxmContext *deferredContext, Ptr<void> mem, uint32_t size) {
    TRACY_FUNC(sceGxmSetDeferredContextVdmBuffer, deferredContext, mem, size);
    if (!deferredContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    // Use the one specified
    deferredContext->state.vdm_buffer = mem;
    deferredContext->state.vdm_buffer_size = size;

    // make sure the next call will use the new vdm buffer
    deferredContext->alloc_space = deferredContext->alloc_space_end;

    return 0;
}

EXPORT(int, sceGxmSetDeferredContextVertexBuffer, SceGxmContext *deferredContext, Ptr<void> mem, uint32_t size) {
    TRACY_FUNC(sceGxmSetDeferredContextVertexBuffer, deferredContext, mem, size);
    if (!deferredContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (deferredContext->state.type != SCE_GXM_CONTEXT_TYPE_DEFERRED) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (deferredContext->state.active) {
        return RET_ERROR(SCE_GXM_ERROR_WITHIN_COMMAND_LIST);
    }

    if ((size != 0) && (size < SCE_GXM_DEFERRED_CONTEXT_MINIMUM_BUFFER_SIZE)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (mem && !size) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    // Use the one specified
    deferredContext->state.vertex_ring_buffer = mem;
    deferredContext->state.vertex_ring_buffer_size = size;

    return 0;
}

EXPORT(int, sceGxmSetFragmentDefaultUniformBuffer, SceGxmContext *context, Ptr<const void> bufferData) {
    TRACY_FUNC(sceGxmSetFragmentDefaultUniformBuffer, context, bufferData);
    if (!context || !bufferData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    context->state.fragment_uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = bufferData;
    return 0;
}

EXPORT(void, sceGxmSetFragmentProgram, SceGxmContext *context, Ptr<const SceGxmFragmentProgram> fragmentProgram) {
    TRACY_FUNC(sceGxmSetFragmentProgram, context, fragmentProgram);
    if (!context || !fragmentProgram)
        return;

    context->state.fragment_program = fragmentProgram;
    renderer::set_program(*emuenv.renderer, context->renderer.get(), fragmentProgram, true);
}

EXPORT(int, sceGxmSetFragmentTexture, SceGxmContext *context, uint32_t textureIndex, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmSetFragmentTexture, context, textureIndex, texture);
    if (!context || !texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (textureIndex > (SCE_GXM_MAX_TEXTURE_UNITS - 1)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    context->state.textures[textureIndex] = *texture;
    context->is_frag_texture_dirty[textureIndex] = true;

    return 0;
}

EXPORT(int, sceGxmSetFragmentUniformBuffer, SceGxmContext *context, uint32_t bufferIndex, Ptr<const void> bufferData) {
    TRACY_FUNC(sceGxmSetFragmentUniformBuffer, context, bufferIndex, bufferData);
    if (!context || !bufferData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (bufferIndex > (SCE_GXM_MAX_UNIFORM_BUFFERS - 1)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    context->state.fragment_uniform_buffers[bufferIndex + SCE_GXM_UNIFORM_BUFFER_OFFSET] = bufferData;
    return 0;
}

EXPORT(void, sceGxmSetFrontDepthBias, SceGxmContext *context, int32_t factor, int32_t units) {
    TRACY_FUNC(sceGxmSetFrontDepthBias, context, factor, units);
    if ((context->state.front_depth_bias_factor != factor) || (context->state.front_depth_bias_units != units)) {
        context->state.front_depth_bias_factor = factor;
        context->state.front_depth_bias_units = units;

        if (context->alloc_space)
            renderer::set_depth_bias(*emuenv.renderer, context->renderer.get(), true, factor, units);
    }
}

EXPORT(void, sceGxmSetFrontDepthFunc, SceGxmContext *context, SceGxmDepthFunc depthFunc) {
    TRACY_FUNC(sceGxmSetFrontDepthFunc, context, depthFunc);
    if (context->state.front_depth_func != depthFunc) {
        context->state.front_depth_func = depthFunc;

        if (context->alloc_space) {
            renderer::set_depth_func(*emuenv.renderer, context->renderer.get(), true, depthFunc);
        }
    }
}

EXPORT(void, sceGxmSetFrontDepthWriteEnable, SceGxmContext *context, SceGxmDepthWriteMode enable) {
    TRACY_FUNC(sceGxmSetFrontDepthWriteEnable, context, enable);
    if (context->state.front_depth_write_enable != enable) {
        context->state.front_depth_write_enable = enable;

        if (context->alloc_space) {
            renderer::set_depth_write_enable_mode(*emuenv.renderer, context->renderer.get(), true, enable);
        }
    }
}

EXPORT(void, sceGxmSetFrontFragmentProgramEnable, SceGxmContext *context, SceGxmFragmentProgramMode enable) {
    TRACY_FUNC(sceGxmSetFrontFragmentProgramEnable, context, enable);
    renderer::set_side_fragment_program_enable(*emuenv.renderer, context->renderer.get(), true, enable);
}

EXPORT(void, sceGxmSetFrontLineFillLastPixelEnable, SceGxmContext *context, SceGxmLineFillLastPixelMode enable) {
    TRACY_FUNC(sceGxmSetFrontLineFillLastPixelEnable, context, enable);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetFrontPointLineWidth, SceGxmContext *context, uint32_t width) {
    TRACY_FUNC(sceGxmSetFrontPointLineWidth, context, width);
    if (context->state.front_point_line_width != width) {
        context->state.front_point_line_width = width;

        if (context->alloc_space) {
            renderer::set_point_line_width(*emuenv.renderer, context->renderer.get(), true, width);
        }
    }
}

EXPORT(void, sceGxmSetFrontPolygonMode, SceGxmContext *context, SceGxmPolygonMode mode) {
    TRACY_FUNC(sceGxmSetFrontPolygonMode, context, mode);
    if (context->state.front_polygon_mode != mode) {
        context->state.front_polygon_mode = mode;

        if (context->alloc_space) {
            renderer::set_polygon_mode(*emuenv.renderer, context->renderer.get(), true, mode);
        }
    }
}

EXPORT(void, sceGxmSetFrontStencilFunc, SceGxmContext *context, SceGxmStencilFunc func, SceGxmStencilOp stencilFail, SceGxmStencilOp depthFail, SceGxmStencilOp depthPass, int32_t compareMask, uint32_t writeMask) {
    TRACY_FUNC(sceGxmSetFrontStencilFunc, context, func, stencilFail, depthFail, depthPass, compareMask, writeMask);
    // compareMask and depthMask should be uint8_t, however the compiler optimizes the call if this is the case...
    const uint8_t compare_mask = static_cast<uint8_t>(compareMask);
    const uint8_t write_mask = static_cast<uint8_t>(writeMask);
    if ((context->state.front_stencil.func != func)
        || (context->state.front_stencil.stencil_fail != stencilFail)
        || (context->state.front_stencil.depth_fail != depthFail)
        || (context->state.front_stencil.depth_pass != depthPass)
        || (context->state.front_stencil.compare_mask != compare_mask)
        || (context->state.front_stencil.write_mask != write_mask)) {
        context->state.front_stencil.func = func;
        context->state.front_stencil.depth_fail = depthFail;
        context->state.front_stencil.depth_pass = depthPass;
        context->state.front_stencil.stencil_fail = stencilFail;
        context->state.front_stencil.compare_mask = compare_mask;
        context->state.front_stencil.write_mask = write_mask;

        if (context->alloc_space)
            renderer::set_stencil_func(*emuenv.renderer, context->renderer.get(), true, func, stencilFail, depthFail, depthPass, compare_mask, write_mask);
    }
}

EXPORT(void, sceGxmSetFrontStencilRef, SceGxmContext *context, uint8_t sref) {
    TRACY_FUNC(sceGxmSetFrontStencilRef, context, sref);
    if (context->state.front_stencil.ref != sref) {
        context->state.front_stencil.ref = sref;
        if (context->alloc_space) {
            renderer::set_stencil_ref(*emuenv.renderer, context->renderer.get(), true, sref);
        }
    }
}

EXPORT(void, sceGxmSetFrontVisibilityTestEnable, SceGxmContext *context, SceGxmVisibilityTestMode enable) {
    TRACY_FUNC(sceGxmSetFrontVisibilityTestEnable, context, enable);

    if (!emuenv.renderer->features.support_memory_mapping) {
        UNIMPLEMENTED();
        return;
    }

    context->state.visibility_enable = enable != SCE_GXM_VISIBILITY_TEST_DISABLED;
    renderer::set_visibility_index(*emuenv.renderer, context->renderer.get(), context->state.visibility_enable, context->state.visibility_index, context->state.visibility_is_increment);
}

EXPORT(void, sceGxmSetFrontVisibilityTestIndex, SceGxmContext *context, uint32_t index) {
    TRACY_FUNC(sceGxmSetFrontVisibilityTestIndex, context, index);

    if (!emuenv.renderer->features.support_memory_mapping) {
        UNIMPLEMENTED();
        return;
    }

    context->state.visibility_index = index;
    renderer::set_visibility_index(*emuenv.renderer, context->renderer.get(), context->state.visibility_enable, context->state.visibility_index, context->state.visibility_is_increment);
}

EXPORT(void, sceGxmSetFrontVisibilityTestOp, SceGxmContext *context, SceGxmVisibilityTestOp op) {
    TRACY_FUNC(sceGxmSetFrontVisibilityTestOp, context, op);

    if (!emuenv.renderer->features.support_memory_mapping) {
        UNIMPLEMENTED();
        return;
    }

    context->state.visibility_is_increment = (op == SCE_GXM_VISIBILITY_TEST_OP_INCREMENT);
    renderer::set_visibility_index(*emuenv.renderer, context->renderer.get(), context->state.visibility_enable, context->state.visibility_index, context->state.visibility_is_increment);
}

EXPORT(void, sceGxmSetPrecomputedFragmentState, SceGxmContext *context, Ptr<SceGxmPrecomputedFragmentState> state) {
    TRACY_FUNC(sceGxmSetPrecomputedFragmentState, context, state);
    if (!state) {
        context->state.precomputed_fragment_state.reset();
        return;
    }

    context->state.precomputed_fragment_state = state;
}

EXPORT(void, sceGxmSetPrecomputedVertexState, SceGxmContext *context, Ptr<SceGxmPrecomputedVertexState> state) {
    TRACY_FUNC(sceGxmSetPrecomputedVertexState, context, state);
    if (!state) {
        context->state.precomputed_vertex_state.reset();
        return;
    }

    context->state.precomputed_vertex_state = state;
}

EXPORT(void, sceGxmSetRegionClip, SceGxmContext *context, SceGxmRegionClipMode mode, uint32_t xMin, uint32_t yMin, uint32_t xMax, uint32_t yMax) {
    TRACY_FUNC(sceGxmSetRegionClip, context, mode, xMin, yMin, xMax, yMax);
    bool change_detected = false;

    if (context->state.region_clip_mode != mode) {
        context->state.region_clip_mode = mode;
        change_detected = true;
    }
    // Set it right here now
    if ((context->state.region_clip_min.x != xMin) || (context->state.region_clip_min.y != yMin) || (context->state.region_clip_max.x != xMax)
        || (context->state.region_clip_max.y != yMax)) {
        context->state.region_clip_min.x = xMin;
        context->state.region_clip_min.y = yMin;
        context->state.region_clip_max.x = xMax;
        context->state.region_clip_max.y = yMax;

        change_detected = true;
    }

    if (change_detected && context->alloc_space)
        renderer::set_region_clip(*emuenv.renderer, context->renderer.get(), mode, xMin, xMax, yMin, yMax);
}

EXPORT(void, sceGxmSetTwoSidedEnable, SceGxmContext *context, SceGxmTwoSidedMode mode) {
    TRACY_FUNC(sceGxmSetTwoSidedEnable, context, mode);
    if (context->state.two_sided != mode) {
        context->state.two_sided = mode;

        if (context->alloc_space) {
            renderer::set_two_sided_enable(*emuenv.renderer, context->renderer.get(), mode);
        }
    }
}

template <typename T>
static void convert_uniform_data(std::vector<std::uint8_t> &converted_data, const float *sourceData, uint32_t componentCount) {
    converted_data.resize(componentCount * sizeof(T));
    for (std::uint32_t i = 0; i < componentCount; ++i) {
        T converted = static_cast<T>(sourceData[i]);
        std::memcpy(&converted_data[i * sizeof(T)], &converted, sizeof(T));
    }
}

EXPORT(int, sceGxmSetUniformDataF, void *uniformBuffer, const SceGxmProgramParameter *parameter, uint32_t componentOffset, uint32_t componentCount, const float *sourceData) {
    TRACY_FUNC(sceGxmSetUniformDataF, uniformBuffer, parameter, componentOffset, componentCount, sourceData);
    assert(parameter);

    if (!uniformBuffer || !parameter || !sourceData)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (parameter->category != SceGxmParameterCategory::SCE_GXM_PARAMETER_CATEGORY_UNIFORM)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    size_t size = 0;
    size_t offset = 0;
    bool is_float = false;

    const std::uint16_t param_type = parameter->type;

    // Component size is in bytes
    int comp_size = gxp::get_parameter_type_size(static_cast<SceGxmParameterType>(param_type));
    const std::uint8_t *source = reinterpret_cast<const std::uint8_t *>(sourceData);
    std::vector<std::uint8_t> converted_data;

    switch (parameter->type) {
    case SCE_GXM_PARAMETER_TYPE_S8: {
        convert_uniform_data<int8_t>(converted_data, sourceData, componentCount);
        source = converted_data.data();
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_U8: {
        convert_uniform_data<uint8_t>(converted_data, sourceData, componentCount);
        source = converted_data.data();
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_U16: {
        convert_uniform_data<uint16_t>(converted_data, sourceData, componentCount);
        source = converted_data.data();
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_S16: {
        convert_uniform_data<int16_t>(converted_data, sourceData, componentCount);
        source = converted_data.data();
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_U32: {
        convert_uniform_data<uint32_t>(converted_data, sourceData, componentCount);
        source = converted_data.data();
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_S32: {
        convert_uniform_data<int32_t>(converted_data, sourceData, componentCount);
        source = converted_data.data();
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_F16: {
        converted_data.resize(((componentCount + 7) / 8) * 8 * 2);
        float_to_half(sourceData, reinterpret_cast<std::uint16_t *>(converted_data.data()), componentCount);

        source = converted_data.data();
        is_float = true;
        break;
    }

    case SCE_GXM_PARAMETER_TYPE_F32: {
        is_float = true;
        break;
    }
    default:
        assert(false);
    }

    if (parameter->array_size == 1 || parameter->component_count == 1) {
        // Case 1: No array. Only a single vector. Don't apply any alignment
        // Case 2: Array but component count equals to 1. This case, a scalar array, align it to 32-bit bound
        if (parameter->component_count == 1) {
            // Apply 32 bit alignment, by making each component has 4 bytes
            comp_size = 4;
        }

        size = componentCount * comp_size;
        offset = parameter->resource_index * sizeof(float) + componentOffset * comp_size;

        memcpy(static_cast<uint8_t *>(uniformBuffer) + offset, source, size);
    } else {
        // This is the size of each element.
        size = parameter->component_count * comp_size;
        int align_bytes = 0;

        if (is_float) {
            // Align it to 64-bit boundary (8 bytes)
            if ((size & 7) != 0) {
                align_bytes = 8 - (size & 7);
            }
        } else {
            // Align it to 32-bit boundary (4 bytes)
            if ((size & 3) != 0) {
                align_bytes = 4 - (size & 3);
            }
        }

        // wtf
        // wtf
        const int vec_to_start_write = componentOffset / parameter->component_count;
        int component_cursor_inside_vector = (componentOffset % parameter->component_count);
        std::uint8_t *dest = static_cast<uint8_t *>(uniformBuffer) + parameter->resource_index * sizeof(float)
            + vec_to_start_write * (size + align_bytes) + component_cursor_inside_vector * comp_size;

        int component_to_copy_remain_per_elem = parameter->component_count - component_cursor_inside_vector;
        int component_left_to_copy = componentCount;

        while (component_left_to_copy > 0) {
            memcpy(dest, source, component_to_copy_remain_per_elem * comp_size);

            // Add and align destination
            dest += comp_size * component_to_copy_remain_per_elem + align_bytes;
            source += component_to_copy_remain_per_elem * comp_size;

            component_left_to_copy -= component_to_copy_remain_per_elem;
            component_to_copy_remain_per_elem = std::min<int>(4, component_to_copy_remain_per_elem);
        }
    }

    return 0;
}

EXPORT(int, sceGxmSetUserMarker) {
    TRACY_FUNC(sceGxmSetUserMarker);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmSetValidationEnable) {
    TRACY_FUNC(sceGxmSetValidationEnable);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmSetVertexDefaultUniformBuffer, SceGxmContext *context, Ptr<const void> bufferData) {
    TRACY_FUNC(sceGxmSetVertexDefaultUniformBuffer, context, bufferData);
    if (!context || !bufferData) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    context->state.vertex_uniform_buffers[SCE_GXM_DEFAULT_UNIFORM_BUFFER_CONTAINER_INDEX] = bufferData;
    return 0;
}

EXPORT(void, sceGxmSetVertexProgram, SceGxmContext *context, Ptr<const SceGxmVertexProgram> vertexProgram) {
    TRACY_FUNC(sceGxmSetVertexProgram, context, vertexProgram);
    if (!context || !vertexProgram)
        return;

    context->state.vertex_program = vertexProgram;
    renderer::set_program(*emuenv.renderer, context->renderer.get(), vertexProgram, false);
}

EXPORT(int, sceGxmSetVertexStream, SceGxmContext *context, uint32_t streamIndex, Ptr<const void> streamData) {
    TRACY_FUNC(sceGxmSetVertexStream, context, streamIndex, streamData);
    if (!context || !streamData)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (streamIndex > (SCE_GXM_MAX_VERTEX_STREAMS - 1))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    context->state.stream_data[streamIndex] = streamData;
    return 0;
}

EXPORT(int, sceGxmSetVertexTexture, SceGxmContext *context, uint32_t textureIndex, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmSetVertexTexture, context, textureIndex, texture);
    if (!context || !texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (textureIndex > (SCE_GXM_MAX_TEXTURE_UNITS - 1)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    context->state.textures[textureIndex + SCE_GXM_MAX_TEXTURE_UNITS] = *texture;
    context->is_vert_texture_dirty[textureIndex] = true;

    return 0;
}

EXPORT(int, _sceGxmSetVertexTexture, SceGxmContext *context, uint32_t textureIndex, const SceGxmTexture *texture) {
    TRACY_FUNC(_sceGxmSetVertexTexture, context, textureIndex, texture);
    return CALL_EXPORT(sceGxmSetVertexTexture, context, textureIndex, texture);
}

EXPORT(int, sceGxmSetVertexUniformBuffer, SceGxmContext *context, uint32_t bufferIndex, Ptr<const void> bufferData) {
    TRACY_FUNC(sceGxmSetVertexUniformBuffer, context, bufferIndex, bufferData);
    if (!context || !bufferData)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (bufferIndex >= SCE_GXM_MAX_UNIFORM_BUFFERS)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    context->state.vertex_uniform_buffers[bufferIndex + SCE_GXM_UNIFORM_BUFFER_OFFSET] = bufferData;
    return 0;
}

EXPORT(void, sceGxmSetViewport, SceGxmContext *context, float xOffset, float xScale, float yOffset, float yScale, float zOffset, float zScale) {
    TRACY_FUNC(sceGxmSetViewport, context, xOffset, xScale, yOffset, yScale, zOffset, zScale);
    // Set viewport to enable, enable more offset and scale to set
    if (context->state.viewport.offset.x != xOffset || (context->state.viewport.offset.y != yOffset) || (context->state.viewport.offset.z != zOffset)
        || (context->state.viewport.scale.x != xScale) || (context->state.viewport.scale.y != yScale) || (context->state.viewport.scale.z != zScale)) {
        context->state.viewport.offset.x = xOffset;
        context->state.viewport.offset.y = yOffset;
        context->state.viewport.offset.z = zOffset;
        context->state.viewport.scale.x = xScale;
        context->state.viewport.scale.y = yScale;
        context->state.viewport.scale.z = zScale;

        if (!context->state.active) {
            LOG_WARN_ONCE("The call was made outside of the Scene. It will be ignored.");
            return;
        }

        if (context->alloc_space) {
            update_viewport(*emuenv.renderer, context);
        }
    }
}

EXPORT(void, sceGxmSetViewportEnable, SceGxmContext *context, SceGxmViewportMode enable) {
    TRACY_FUNC(sceGxmSetViewportEnable, context, enable);
    // Set viewport to enable/disable, no additional offset and scale to set.
    if (context->state.viewport.enable != enable) {
        context->state.viewport.enable = enable;

        if (!context->state.active) {
            LOG_WARN_ONCE("The call was made outside of the Scene. It will be applied when the next Scene is called.");
            return;
        }

        if (context->alloc_space) {
            update_viewport(*emuenv.renderer, context);
        }
    }
}

EXPORT(int, sceGxmSetVisibilityBuffer, SceGxmContext *immediateContext, Ptr<void> bufferBase, uint32_t stridePerCore) {
    TRACY_FUNC(sceGxmSetVisibilityBuffer, immediateContext, bufferBase, stridePerCore);
    if (!immediateContext) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (immediateContext->state.type != SCE_GXM_CONTEXT_TYPE_IMMEDIATE)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    if (bufferBase.address() & (SCE_GXM_VISIBILITY_ALIGNMENT - 1))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);

    if (emuenv.renderer->features.support_memory_mapping) {
        renderer::set_visibility_buffer(*emuenv.renderer, immediateContext->renderer.get(), bufferBase.cast<uint32_t>(), stridePerCore);
    } else {
        STUBBED("Set all visible");
        memset(bufferBase.get(emuenv.mem), 0xFF, SCE_GXM_GPU_CORE_COUNT * stridePerCore);
    }

    return 0;
}

EXPORT(void, sceGxmSetWBufferEnable) {
    TRACY_FUNC(sceGxmSetWBufferEnable);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetWClampEnable) {
    TRACY_FUNC(sceGxmSetWClampEnable);
    UNIMPLEMENTED();
}

EXPORT(void, sceGxmSetWClampValue, SceGxmContext *context, float clampValue) {
    TRACY_FUNC(sceGxmSetWClampValue, context, clampValue);
    UNIMPLEMENTED();
}

EXPORT(int, sceGxmSetWarningEnabled) {
    TRACY_FUNC(sceGxmSetWarningEnabled);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmSetYuvProfile) {
    TRACY_FUNC(sceGxmSetYuvProfile);
    return UNIMPLEMENTED();
}

static Address alloc_callbacked(EmuEnvState &emuenv, SceUID thread_id, const SceGxmShaderPatcherParams &shaderPatcherParams, unsigned int size) {
    if (!shaderPatcherParams.hostAllocCallback) {
        LOG_ERROR("Empty hostAllocCallback");
    }
    const auto thread = emuenv.kernel.get_thread(thread_id);
    auto result = thread->run_callback(shaderPatcherParams.hostAllocCallback.address(), { shaderPatcherParams.userData.address(), size });
    return result;
}

template <typename T>
static Ptr<T> alloc_callbacked(EmuEnvState &emuenv, SceUID thread_id, const SceGxmShaderPatcherParams &shaderPatcherParams) {
    const Address address = alloc_callbacked(emuenv, thread_id, shaderPatcherParams, sizeof(T));
    const Ptr<T> ptr(address);
    if (!ptr) {
        return ptr;
    }
    T *const memory = ptr.get(emuenv.mem);
    new (memory) T;
    return ptr;
}

template <typename T>
static Ptr<T> alloc_callbacked(EmuEnvState &emuenv, SceUID thread_id, SceGxmShaderPatcher *shaderPatcher) {
    return alloc_callbacked<T>(emuenv, thread_id, shaderPatcher->params);
}

static void free_callbacked(EmuEnvState &emuenv, SceUID thread_id, SceGxmShaderPatcher *shaderPatcher, Address data) {
    if (!shaderPatcher->params.hostFreeCallback) {
        LOG_ERROR("Empty hostFreeCallback");
    }
    const auto thread = emuenv.kernel.get_thread(thread_id);
    thread->run_callback(shaderPatcher->params.hostFreeCallback.address(), { shaderPatcher->params.userData.address(), data });
}

template <typename T>
static void free_callbacked(EmuEnvState &emuenv, SceUID thread_id, SceGxmShaderPatcher *shaderPatcher, Ptr<T> data) {
    free_callbacked(emuenv, thread_id, shaderPatcher, data.address());
}

EXPORT(int, sceGxmShaderPatcherAddRefFragmentProgram, SceGxmShaderPatcher *shaderPatcher, SceGxmFragmentProgram *fragmentProgram) {
    TRACY_FUNC(sceGxmShaderPatcherAddRefFragmentProgram, shaderPatcher, fragmentProgram);
    if (!shaderPatcher || !fragmentProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    ++fragmentProgram->reference_count;

    return 0;
}

EXPORT(int, sceGxmShaderPatcherAddRefVertexProgram, SceGxmShaderPatcher *shaderPatcher, SceGxmVertexProgram *vertexProgram) {
    TRACY_FUNC(sceGxmShaderPatcherAddRefVertexProgram, shaderPatcher, vertexProgram);
    if (!shaderPatcher || !vertexProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    ++vertexProgram->reference_count;

    return 0;
}

EXPORT(int, sceGxmShaderPatcherCreate, const SceGxmShaderPatcherParams *params, Ptr<SceGxmShaderPatcher> *shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherCreate, params, shaderPatcher);
    if (!params || !shaderPatcher)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    *shaderPatcher = alloc_callbacked<SceGxmShaderPatcher>(emuenv, thread_id, *params);
    assert(*shaderPatcher);
    if (!*shaderPatcher) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }
    shaderPatcher->get(emuenv.mem)->params = *params;
    return 0;
}

EXPORT(int, sceGxmShaderPatcherCreateFragmentProgram, SceGxmShaderPatcher *shaderPatcher, const SceGxmRegisteredProgram *programId, SceGxmOutputRegisterFormat outputFormat, SceGxmMultisampleMode multisampleMode, const SceGxmBlendInfo *blendInfo, Ptr<const SceGxmProgram> vertexProgram, Ptr<SceGxmFragmentProgram> *fragmentProgram) {
    TRACY_FUNC(sceGxmShaderPatcherCreateFragmentProgram, shaderPatcher, programId, outputFormat, multisampleMode, blendInfo, vertexProgram, fragmentProgram);
    MemState &mem = emuenv.mem;

    if (!shaderPatcher || !programId || !fragmentProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    static const SceGxmBlendInfo default_blend_info = {
        SCE_GXM_COLOR_MASK_ALL,
        SCE_GXM_BLEND_FUNC_NONE,
        SCE_GXM_BLEND_FUNC_NONE,
        SCE_GXM_BLEND_FACTOR_ONE,
        SCE_GXM_BLEND_FACTOR_ZERO,
        SCE_GXM_BLEND_FACTOR_ONE,
        SCE_GXM_BLEND_FACTOR_ZERO
    };
    const FragmentProgramCacheKey key = {
        *programId,
        (blendInfo != nullptr) ? *blendInfo : default_blend_info
    };
    FragmentProgramCache::const_iterator cached = shaderPatcher->fragment_program_cache.find(key);
    if (cached != shaderPatcher->fragment_program_cache.end()) {
        ++cached->second.get(mem)->reference_count;
        *fragmentProgram = cached->second;
        return 0;
    }

    *fragmentProgram = alloc_callbacked<SceGxmFragmentProgram>(emuenv, thread_id, shaderPatcher);
    assert(*fragmentProgram);
    if (!*fragmentProgram) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }

    SceGxmFragmentProgram *const fp = fragmentProgram->get(mem);
    fp->is_maskupdate = false;
    fp->program = programId->program;

    if (!renderer::create(fp->renderer_data, *emuenv.renderer, *programId->program.get(mem), blendInfo, emuenv.renderer->gxp_ptr_map)) {
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);
    }

    shaderPatcher->fragment_program_cache.emplace(key, *fragmentProgram);

    return 0;
}

EXPORT(int, sceGxmShaderPatcherCreateMaskUpdateFragmentProgram, SceGxmShaderPatcher *shaderPatcher, Ptr<SceGxmFragmentProgram> *fragmentProgram) {
    TRACY_FUNC(sceGxmShaderPatcherCreateMaskUpdateFragmentProgram, shaderPatcher, fragmentProgram);
    MemState &mem = emuenv.mem;

    if (!shaderPatcher || !fragmentProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    *fragmentProgram = alloc_callbacked<SceGxmFragmentProgram>(emuenv, thread_id, shaderPatcher);
    assert(*fragmentProgram);
    if (!*fragmentProgram) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }

    SceGxmFragmentProgram *const fp = fragmentProgram->get(mem);
    fp->is_maskupdate = true;
    fp->program = Ptr<const SceGxmProgram>(alloc_callbacked(emuenv, thread_id, shaderPatcher->params, size_mask_gxp));
    memcpy(const_cast<SceGxmProgram *>(fp->program.get(mem)), mask_gxp, size_mask_gxp);

    if (!renderer::create(fp->renderer_data, *emuenv.renderer, *fp->program.get(mem), nullptr, emuenv.renderer->gxp_ptr_map)) {
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);
    }

    return 0;
}

EXPORT(int, sceGxmShaderPatcherCreateVertexProgram, SceGxmShaderPatcher *shaderPatcher, const SceGxmRegisteredProgram *programId, const SceGxmVertexAttribute *attributes, uint32_t attributeCount, const SceGxmVertexStream *streams, uint32_t streamCount, Ptr<SceGxmVertexProgram> *vertexProgram) {
    TRACY_FUNC(sceGxmShaderPatcherCreateVertexProgram, shaderPatcher, programId, attributes, attributeCount, streams, streamCount, vertexProgram);
    MemState &mem = emuenv.mem;

    if (!shaderPatcher || !programId || !vertexProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    VertexProgramCacheKey key = {
        *programId,
        0
    };

    if (attributes) {
        key.hash = hash_data(attributes, sizeof(SceGxmVertexAttribute) * attributeCount);
    }

    if (streams) {
        key.hash ^= hash_data(streams, sizeof(SceGxmVertexStream) * streamCount);
    }

    VertexProgramCache::const_iterator cached = shaderPatcher->vertex_program_cache.find(key);
    if (cached != shaderPatcher->vertex_program_cache.end()) {
        ++cached->second.get(mem)->reference_count;
        *vertexProgram = cached->second;
        return 0;
    }

    *vertexProgram = alloc_callbacked<SceGxmVertexProgram>(emuenv, thread_id, shaderPatcher);
    assert(*vertexProgram);
    if (!*vertexProgram) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }

    SceGxmVertexProgram *const vp = vertexProgram->get(mem);
    vp->program = programId->program;
    vp->key_hash = key.hash;

    if (streams && streamCount > 0) {
        vp->streams.insert(vp->streams.end(), &streams[0], &streams[streamCount]);
    }

    if (attributes && attributeCount > 0) {
        vp->attributes.insert(vp->attributes.end(), &attributes[0], &attributes[attributeCount]);
    }

    if (!renderer::create(vp->renderer_data, *emuenv.renderer, *programId->program.get(mem), emuenv.renderer->gxp_ptr_map, vp->attributes)) {
        return RET_ERROR(SCE_GXM_ERROR_DRIVER);
    }

    shaderPatcher->vertex_program_cache.emplace(key, *vertexProgram);

    return 0;
}

EXPORT(int, sceGxmShaderPatcherDestroy, Ptr<SceGxmShaderPatcher> shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherDestroy, shaderPatcher);
    if (!shaderPatcher)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    free_callbacked(emuenv, thread_id, shaderPatcher.get(emuenv.mem), shaderPatcher);

    return 0;
}

EXPORT(int, sceGxmShaderPatcherForceUnregisterProgram, SceGxmShaderPatcher *shaderPatcher, SceGxmShaderPatcherId programId) {
    TRACY_FUNC(sceGxmShaderPatcherForceUnregisterProgram, shaderPatcher, programId);
    if (!shaderPatcher || !programId) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    SceGxmRegisteredProgram *rp = programId.get(emuenv.mem);

    // look for existing programs and free them
    if (rp->program.get(emuenv.mem)->is_vertex()) {
        for (auto it = shaderPatcher->vertex_program_cache.begin(); it != shaderPatcher->vertex_program_cache.end();) {
            if (it->first.vertex_program.program == rp->program) {
                SceGxmVertexProgram *vertex_program = it->second.get(emuenv.mem);
                while (vertex_program->compile_threads_on.load(std::memory_order_acquire) > 0)
                    std::this_thread::yield();

                free_callbacked(emuenv, thread_id, shaderPatcher, it->second.address());
                it = shaderPatcher->vertex_program_cache.erase(it);
            } else {
                ++it;
            }
        }
    } else {
        for (auto it = shaderPatcher->fragment_program_cache.begin(); it != shaderPatcher->fragment_program_cache.end();) {
            if (it->first.fragment_program.program == rp->program) {
                SceGxmFragmentProgram *frag_program = it->second.get(emuenv.mem);
                while (frag_program->compile_threads_on.load(std::memory_order_acquire) > 0)
                    std::this_thread::yield();

                free_callbacked(emuenv, thread_id, shaderPatcher, it->second.address());
                it = shaderPatcher->fragment_program_cache.erase(it);
            } else {
                ++it;
            }
        }
    }

    rp->program.reset();
    free_callbacked(emuenv, thread_id, shaderPatcher, programId);

    return 0;
}

EXPORT(uint32_t, sceGxmShaderPatcherGetBufferMemAllocated, const SceGxmShaderPatcher *shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherGetBufferMemAllocated, shaderPatcher);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmShaderPatcherGetFragmentProgramRefCount, const SceGxmShaderPatcher *shaderPatcher, const SceGxmFragmentProgram *fragmentProgram, uint32_t *refCount) {
    TRACY_FUNC(sceGxmShaderPatcherGetFragmentProgramRefCount, shaderPatcher, fragmentProgram, refCount);
    if (!shaderPatcher || !fragmentProgram || !refCount) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    *refCount = fragmentProgram->reference_count;
    return 0;
}

EXPORT(uint32_t, sceGxmShaderPatcherGetFragmentUsseMemAllocated, const SceGxmShaderPatcher *shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherGetFragmentUsseMemAllocated, shaderPatcher);
    return UNIMPLEMENTED();
}

EXPORT(uint32_t, sceGxmShaderPatcherGetHostMemAllocated, const SceGxmShaderPatcher *shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherGetHostMemAllocated, shaderPatcher);
    return UNIMPLEMENTED();
}

EXPORT(Ptr<const SceGxmProgram>, sceGxmShaderPatcherGetProgramFromId, SceGxmShaderPatcherId programId) {
    TRACY_FUNC(sceGxmShaderPatcherGetProgramFromId, programId);
    if (!programId) {
        return Ptr<const SceGxmProgram>();
    }

    return programId.get(emuenv.mem)->program;
}

EXPORT(Ptr<void>, sceGxmShaderPatcherGetUserData, const SceGxmShaderPatcher *shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherGetUserData, shaderPatcher);
    if (!shaderPatcher) {
        return Ptr<void>(0);
    }
    return shaderPatcher->params.userData;
}

EXPORT(int, sceGxmShaderPatcherGetVertexProgramRefCount, const SceGxmShaderPatcher *shaderPatcher, const SceGxmVertexProgram *vertexProgram, uint32_t *refCount) {
    TRACY_FUNC(sceGxmShaderPatcherGetVertexProgramRefCount, shaderPatcher, vertexProgram, refCount);
    if (!shaderPatcher || !vertexProgram || !refCount) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    *refCount = vertexProgram->reference_count;
    return 0;
}

EXPORT(uint32_t, sceGxmShaderPatcherGetVertexUsseMemAllocated, const SceGxmShaderPatcher *shaderPatcher) {
    TRACY_FUNC(sceGxmShaderPatcherGetVertexUsseMemAllocated, shaderPatcher);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmShaderPatcherRegisterProgram, SceGxmShaderPatcher *shaderPatcher, Ptr<const SceGxmProgram> programHeader, SceGxmShaderPatcherId *programId) {
    TRACY_FUNC(sceGxmShaderPatcherRegisterProgram, shaderPatcher, programHeader, programId);
    if (!shaderPatcher || !programHeader || !programId)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    *programId = alloc_callbacked<SceGxmRegisteredProgram>(emuenv, thread_id, shaderPatcher);
    assert(*programId);
    if (!*programId) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }

    SceGxmRegisteredProgram *const rp = programId->get(emuenv.mem);
    rp->program = programHeader;

    return 0;
}

EXPORT(int, sceGxmShaderPatcherReleaseFragmentProgram, SceGxmShaderPatcher *shaderPatcher, Ptr<SceGxmFragmentProgram> fragmentProgram) {
    TRACY_FUNC(sceGxmShaderPatcherReleaseFragmentProgram, shaderPatcher, fragmentProgram);
    if (!shaderPatcher || !fragmentProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    SceGxmFragmentProgram *const fp = fragmentProgram.get(emuenv.mem);
    --fp->reference_count;
    if (fp->reference_count == 0) {
        while (fp->compile_threads_on.load(std::memory_order_acquire) > 0)
            std::this_thread::yield();

        for (FragmentProgramCache::const_iterator it = shaderPatcher->fragment_program_cache.begin(); it != shaderPatcher->fragment_program_cache.end(); ++it) {
            if (it->second == fragmentProgram) {
                shaderPatcher->fragment_program_cache.erase(it);
                break;
            }
        }
        free_callbacked(emuenv, thread_id, shaderPatcher, fragmentProgram);
    }

    return 0;
}

EXPORT(int, sceGxmShaderPatcherReleaseVertexProgram, SceGxmShaderPatcher *shaderPatcher, Ptr<SceGxmVertexProgram> vertexProgram) {
    TRACY_FUNC(sceGxmShaderPatcherReleaseVertexProgram, shaderPatcher, vertexProgram);
    if (!shaderPatcher || !vertexProgram)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    SceGxmVertexProgram *const vp = vertexProgram.get(emuenv.mem);
    --vp->reference_count;
    if (vp->reference_count == 0) {
        while (vp->compile_threads_on.load(std::memory_order_acquire) > 0)
            std::this_thread::yield();

        for (VertexProgramCache::const_iterator it = shaderPatcher->vertex_program_cache.begin(); it != shaderPatcher->vertex_program_cache.end(); ++it) {
            if (it->second == vertexProgram) {
                shaderPatcher->vertex_program_cache.erase(it);
                break;
            }
        }
        free_callbacked(emuenv, thread_id, shaderPatcher, vertexProgram);
    }

    return 0;
}

EXPORT(int, sceGxmShaderPatcherSetAuxiliarySurface) {
    TRACY_FUNC(sceGxmShaderPatcherSetAuxiliarySurface);
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmShaderPatcherSetUserData, SceGxmShaderPatcher *shaderPatcher, Ptr<void> userData) {
    TRACY_FUNC(sceGxmShaderPatcherSetUserData, shaderPatcher, userData);
    if (!shaderPatcher) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }
    shaderPatcher->params.userData = userData;
    return 0;
}

EXPORT(int, sceGxmShaderPatcherUnregisterProgram, SceGxmShaderPatcher *shaderPatcher, SceGxmShaderPatcherId programId) {
    TRACY_FUNC(sceGxmShaderPatcherUnregisterProgram, shaderPatcher, programId);
    if (!shaderPatcher || !programId)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    SceGxmRegisteredProgram *const rp = programId.get(emuenv.mem);
    rp->program.reset();

    free_callbacked(emuenv, thread_id, shaderPatcher, programId);

    return 0;
}

EXPORT(int, sceGxmSyncObjectCreate, Ptr<SceGxmSyncObject> *syncObject) {
    TRACY_FUNC(sceGxmSyncObjectCreate, syncObject);
    if (!syncObject)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    *syncObject = alloc<SceGxmSyncObject>(emuenv.mem, __FUNCTION__);
    if (!*syncObject) {
        return RET_ERROR(SCE_GXM_ERROR_OUT_OF_MEMORY);
    }

    renderer::create(syncObject->get(emuenv.mem), *emuenv.renderer);

    return 0;
}

EXPORT(int, sceGxmSyncObjectDestroy, Ptr<SceGxmSyncObject> syncObject) {
    TRACY_FUNC(sceGxmSyncObjectDestroy, syncObject);
    if (!syncObject)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    renderer::destroy(syncObject.get(emuenv.mem), *emuenv.renderer);
    free(emuenv.mem, syncObject);

    return 0;
}

EXPORT(int, sceGxmTerminate) {
    TRACY_FUNC(sceGxmTerminate);
    // Make sure everything is done in SDL side before killing Vita thread
    emuenv.gxm.display_queue.wait_empty();
    emuenv.gxm.display_queue.abort();
    emuenv.kernel.get_thread(emuenv.gxm.display_queue_thread)->exit_delete();
    return 0;
}

EXPORT(Ptr<void>, sceGxmTextureGetData, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetData, texture);
    assert(texture);
    return Ptr<void>(texture->data_addr << 2);
}

EXPORT(SceGxmTextureFormat, sceGxmTextureGetFormat, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetFormat, texture);
    assert(texture);
    return gxm::get_format(*texture);
}

EXPORT(int, sceGxmTextureGetGammaMode, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetGammaMode, texture);
    assert(texture);
    return (texture->gamma_mode << 27);
}

EXPORT(uint32_t, sceGxmTextureGetHeight, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetHeight, texture);
    assert(texture);
    return gxm::get_height(*texture);
}

EXPORT(uint32_t, sceGxmTextureGetLodBias, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetLodBias, texture);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return 0;
    }

    return texture->lod_bias;
}

EXPORT(uint32_t, sceGxmTextureGetLodMin, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetLodMin, texture);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return 0;
    }

    return texture->lod_min0 | (texture->lod_min1 << 2);
}

EXPORT(int, sceGxmTextureGetMagFilter, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetMagFilter, texture);
    assert(texture);
    return texture->mag_filter;
}

EXPORT(int, sceGxmTextureGetMinFilter, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetMinFilter, texture);
    assert(texture);
    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return texture->mag_filter;
    }
    return texture->min_filter;
}

EXPORT(SceGxmTextureMipFilter, sceGxmTextureGetMipFilter, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetMipFilter, texture);
    assert(texture);
    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return SCE_GXM_TEXTURE_MIP_FILTER_DISABLED;
    }
    return texture->mip_filter ? SCE_GXM_TEXTURE_MIP_FILTER_ENABLED : SCE_GXM_TEXTURE_MIP_FILTER_DISABLED;
}

EXPORT(uint32_t, sceGxmTextureGetMipmapCount, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetMipmapCount, texture);
    assert(texture);
    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return 0;
    }
    return (texture->mip_count + 1) & 0xf;
}

EXPORT(uint32_t, sceGxmTextureGetMipmapCountUnsafe, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetMipmapCountUnsafe, texture);
    assert(texture);
    return (texture->mip_count + 1) & 0xf;
}

EXPORT(int, sceGxmTextureGetNormalizeMode, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetNormalizeMode, texture);
    assert(texture);
    return texture->normalize_mode << 31;
}

EXPORT(Ptr<void>, sceGxmTextureGetPalette, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetPalette, texture);
    const auto base_format = gxm::get_base_format(gxm::get_format(*texture));

    return gxm::is_paletted_format(base_format) ? Ptr<void>(texture->palette_addr << 6) : Ptr<void>();
}

EXPORT(uint32_t, sceGxmTextureGetStride, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetStride, texture);
    assert(texture);
    if (texture->texture_type() != SCE_GXM_TEXTURE_LINEAR_STRIDED)
        return 0;

    return gxm::get_stride_in_bytes(*texture);
}

EXPORT(int, sceGxmTextureGetType, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetType, texture);
    assert(texture);
    return (texture->type << 29);
}

EXPORT(int, sceGxmTextureGetUAddrMode, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetUAddrMode, texture);
    assert(texture);
    return texture->uaddr_mode;
}

EXPORT(int, sceGxmTextureGetUAddrModeSafe, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetUAddrModeSafe, texture);
    assert(texture);
    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return SCE_GXM_TEXTURE_ADDR_CLAMP;
    }
    return texture->uaddr_mode;
}

EXPORT(int, sceGxmTextureGetVAddrMode, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetVAddrMode, texture);
    assert(texture);
    return texture->vaddr_mode;
}

EXPORT(int, sceGxmTextureGetVAddrModeSafe, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetVAddrModeSafe, texture);
    assert(texture);
    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return SCE_GXM_TEXTURE_ADDR_CLAMP;
    }
    return texture->vaddr_mode;
}

EXPORT(uint32_t, sceGxmTextureGetWidth, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureGetWidth, texture);
    assert(texture);
    return gxm::get_width(*texture);
}

EXPORT(int, sceGxmTextureInitCube, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureInitCube, texture, data, texFormat, width, height, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    // width and height must be powers of 2
    if (width == 0 || height == 0 || (width & (width - 1)) || (height & (height - 1)))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);

    const int result = init_texture_base(export_name, texture, data, texFormat, width, height, mipCount, SCE_GXM_TEXTURE_CUBE);

    return result;
}

EXPORT(int, sceGxmTextureInitCubeArbitrary, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureInitCubeArbitrary, texture, data, texFormat, width, height, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const int result = init_texture_base(export_name, texture, data, texFormat, width, height, mipCount, SCE_GXM_TEXTURE_CUBE_ARBITRARY);

    return result;
}

EXPORT(int, sceGxmTextureInitLinear, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureInitLinear, texture, data, texFormat, width, height, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const int result = init_texture_base(export_name, texture, data, texFormat, width, height, mipCount, SCE_GXM_TEXTURE_LINEAR);

    return result;
}

EXPORT(int, sceGxmTextureInitLinearStrided, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t byteStride) {
    TRACY_FUNC(sceGxmTextureInitLinearStrided, texture, data, texFormat, width, height, byteStride);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((width > 4096) || (height > 4096)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (byteStride & 3) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);
    }

    if ((byteStride < 4) || (byteStride > 131072)) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    memset(texture, 0, sizeof(SceGxmTexture));

    const uint32_t stride_compressed = (byteStride >> 2) - 1;
    texture->mip_filter = stride_compressed & 1;
    texture->min_filter = (stride_compressed & 0b0000110) >> 1;
    texture->mip_count = (stride_compressed & 0b1111000) >> 3;
    texture->lod_bias = (stride_compressed & 0b1111110000000) >> 7;
    texture->base_format = (texFormat & 0x1F000000) >> 24;
    texture->type = SCE_GXM_TEXTURE_LINEAR_STRIDED >> 29;
    texture->data_addr = data.address() >> 2;
    texture->swizzle_format = (texFormat & 0x7000) >> 12;
    texture->normalize_mode = 1;
    texture->format0 = (texFormat & 0x80000000) >> 31;
    texture->uaddr_mode = texture->vaddr_mode = SCE_GXM_TEXTURE_ADDR_CLAMP;
    texture->height = height - 1;
    texture->width = width - 1;
    texture->mag_filter = SCE_GXM_TEXTURE_FILTER_POINT;
    texture->gamma_mode = 0;
    texture->lod_min0 = 0;
    texture->lod_min1 = 0;

    return 0;
}

EXPORT(int, sceGxmTextureInitSwizzled, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureInitSwizzled, texture, data, texFormat, width, height, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    // width and height must be powers of 2
    if (width == 0 || height == 0 || (width & (width - 1)) || (height & (height - 1)))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);

    const int result = init_texture_base(export_name, texture, data, texFormat, width, height, mipCount, SCE_GXM_TEXTURE_SWIZZLED);

    return result;
}

EXPORT(int, sceGxmTextureInitSwizzledArbitrary, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureInitSwizzledArbitrary, texture, data, texFormat, width, height, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const auto result = init_texture_base(export_name, texture, data, texFormat, width, height, mipCount, SCE_GXM_TEXTURE_SWIZZLED_ARBITRARY);

    return result;
}

EXPORT(int, sceGxmTextureInitTiled, SceGxmTexture *texture, Ptr<const void> data, SceGxmTextureFormat texFormat, uint32_t width, uint32_t height, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureInitTiled, texture, data, texFormat, width, height, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    const int result = init_texture_base(export_name, texture, data, texFormat, width, height, mipCount, SCE_GXM_TEXTURE_TILED);

    return result;
}

EXPORT(int, sceGxmTextureSetData, SceGxmTexture *texture, Ptr<const void> data) {
    TRACY_FUNC(sceGxmTextureSetData, texture, data);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    texture->data_addr = data.address() >> 2;
    return 0;
}

EXPORT(int, sceGxmTextureSetFormat, SceGxmTexture *texture, SceGxmTextureFormat texFormat) {
    TRACY_FUNC(sceGxmTextureSetFormat, texture, texFormat);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    texture->base_format = (texFormat & 0x1F000000) >> 24;
    texture->swizzle_format = (texFormat & 0x7000) >> 12;
    texture->format0 = (texFormat & 0x80000000) >> 31;

    return SCE_KERNEL_OK;
}

EXPORT(int, sceGxmTextureSetGammaMode, SceGxmTexture *texture, SceGxmTextureGammaMode gammaMode) {
    TRACY_FUNC(sceGxmTextureSetGammaMode, texture, gammaMode);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }
    texture->gamma_mode = (static_cast<std::uint32_t>(gammaMode) >> 27);
    return 0;
}

EXPORT(int, sceGxmTextureSetHeight, SceGxmTexture *texture, uint32_t height) {
    TRACY_FUNC(sceGxmTextureSetHeight, texture, height);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    if (height > 4096)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    if ((texture->type << 29) == SCE_GXM_TEXTURE_TILED) {
        if (texture->mip_count > 1) {
            if (height >> (texture->mip_count - 1) >> 0x1F) {
                goto LINEAR;
            }
        }
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (((texture->type << 29) != SCE_GXM_TEXTURE_SWIZZLED) && ((texture->type << 29) != SCE_GXM_TEXTURE_TILED)) {
    LINEAR:
        texture->height = height - 1;
        return 0;
    }

    // TODO: Add support for swizzled textures
    LOG_WARN("Unimplemented texture format detected in sceGxmTextureSetHeight call.");

    return 0;
}

EXPORT(int, _sceGxmTextureSetHeight, SceGxmTexture *texture, uint32_t height) {
    TRACY_FUNC(_sceGxmTextureSetHeight, texture, height);
    return CALL_EXPORT(sceGxmTextureSetHeight, texture, height);
}

EXPORT(int, sceGxmTextureSetLodBias, SceGxmTexture *texture, uint32_t bias) {
    TRACY_FUNC(sceGxmTextureSetLodBias, texture, bias);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);
    }

    if (bias > 63) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    texture->lod_bias = bias;

    return 0;
}

EXPORT(int, sceGxmTextureSetLodMin, SceGxmTexture *texture, uint32_t lodMin) {
    TRACY_FUNC(sceGxmTextureSetLodMin, texture, lodMin);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);
    }

    texture->lod_min0 = lodMin & 3;
    texture->lod_min1 = lodMin >> 2;

    return 0;
}

EXPORT(int, sceGxmTextureSetMagFilter, SceGxmTexture *texture, SceGxmTextureFilter magFilter) {
    TRACY_FUNC(sceGxmTextureSetMagFilter, texture, magFilter);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    texture->mag_filter = (uint32_t)magFilter;
    return 0;
}

EXPORT(int, sceGxmTextureSetMinFilter, SceGxmTexture *texture, SceGxmTextureFilter minFilter) {
    TRACY_FUNC(sceGxmTextureSetMinFilter, texture, minFilter);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);
    }

    texture->min_filter = (uint32_t)minFilter;
    return 0;
}

EXPORT(int, sceGxmTextureSetMipFilter, SceGxmTexture *texture, SceGxmTextureMipFilter mipFilter) {
    TRACY_FUNC(sceGxmTextureSetMipFilter, texture, mipFilter);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);
    }

    texture->mip_filter = static_cast<bool>(mipFilter);
    return 0;
}

EXPORT(int, sceGxmTextureSetMipmapCount, SceGxmTexture *texture, uint32_t mipCount) {
    TRACY_FUNC(sceGxmTextureSetMipmapCount, texture, mipCount);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if ((texture->type << 29) == SCE_GXM_TEXTURE_LINEAR_STRIDED) {
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);
    }

    if (mipCount > 13) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    texture->mip_count = std::min<std::uint32_t>(15, mipCount - 1);
    return 0;
}

EXPORT(int, sceGxmTextureSetNormalizeMode, SceGxmTexture *texture, SceGxmTextureNormalizeMode normalizeMode) {
    TRACY_FUNC(sceGxmTextureSetNormalizeMode, texture, normalizeMode);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (!normalizeMode)
        LOG_WARN("Unimplemented unnormalized texture, please report it to a developper");

    texture->normalize_mode = (static_cast<std::uint32_t>(normalizeMode) >> 31);
    return 0;
}

EXPORT(int, sceGxmTextureSetPalette, SceGxmTexture *texture, Ptr<const void> paletteData) {
    TRACY_FUNC(sceGxmTextureSetPalette, texture, paletteData);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    if (paletteData.address() & 0x3F)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);

    texture->palette_addr = (paletteData.address() >> 6);
    return 0;
}

EXPORT(int, sceGxmTextureSetStride, SceGxmTexture *texture, uint32_t byteStride) {
    TRACY_FUNC(sceGxmTextureSetStride, texture, byteStride);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    if ((texture->type << 29) != SCE_GXM_TEXTURE_LINEAR_STRIDED)
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);
    if (byteStride & 3)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_ALIGNMENT);
    if ((byteStride < 4) || (byteStride > 131072))
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    return UNIMPLEMENTED();
}

static bool verify_texture_mode(SceGxmTexture *texture, SceGxmTextureAddrMode mode) {
    if ((texture->type << 29) == SCE_GXM_TEXTURE_CUBE || (texture->type << 29) == SCE_GXM_TEXTURE_CUBE_ARBITRARY) {
        if (mode != SCE_GXM_TEXTURE_ADDR_CLAMP) {
            return false;
        }
    } else {
        if (mode <= SCE_GXM_TEXTURE_ADDR_CLAMP_HALF_BORDER && mode >= SCE_GXM_TEXTURE_ADDR_REPEAT_IGNORE_BORDER) {
            if ((texture->type << 29) != SCE_GXM_TEXTURE_SWIZZLED) {
                return false;
            }
        }
        if (mode == SCE_GXM_TEXTURE_ADDR_MIRROR && ((texture->type << 29) != SCE_GXM_TEXTURE_SWIZZLED)) {
            return false;
        }
    }
    return true;
}

EXPORT(int, sceGxmTextureSetUAddrMode, SceGxmTexture *texture, SceGxmTextureAddrMode mode) {
    TRACY_FUNC(sceGxmTextureSetUAddrMode, texture, mode);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!verify_texture_mode(texture, mode))
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);

    texture->uaddr_mode = mode;
    return 0;
}

EXPORT(int, sceGxmTextureSetUAddrModeSafe, SceGxmTexture *texture, SceGxmTextureAddrMode mode) {
    TRACY_FUNC(sceGxmTextureSetUAddrModeSafe, texture, mode);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!verify_texture_mode(texture, mode))
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);

    texture->uaddr_mode = mode;
    return 0;
}

EXPORT(int, sceGxmTextureSetVAddrMode, SceGxmTexture *texture, SceGxmTextureAddrMode mode) {
    TRACY_FUNC(sceGxmTextureSetVAddrMode, texture, mode);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!verify_texture_mode(texture, mode))
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);

    texture->vaddr_mode = mode;
    return 0;
}

EXPORT(int, sceGxmTextureSetVAddrModeSafe, SceGxmTexture *texture, SceGxmTextureAddrMode mode) {
    TRACY_FUNC(sceGxmTextureSetVAddrModeSafe, texture, mode);
    if (!texture)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    if (!verify_texture_mode(texture, mode))
        return RET_ERROR(SCE_GXM_ERROR_UNSUPPORTED);

    texture->vaddr_mode = mode;
    return 0;
}

EXPORT(int, sceGxmTextureSetWidth, SceGxmTexture *texture, uint32_t width) {
    TRACY_FUNC(sceGxmTextureSetWidth, texture, width);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    } else if (width > 4096) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if ((texture->type << 29) == SCE_GXM_TEXTURE_TILED) {
        if (texture->mip_count > 1) {
            if (width >> (texture->mip_count - 1) >> 0x1F) {
                goto LINEAR;
            }
        }
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);
    }

    if (((texture->type << 29) != SCE_GXM_TEXTURE_SWIZZLED) && ((texture->type << 29) != SCE_GXM_TEXTURE_TILED)) {
    LINEAR:
        texture->width = width - 1;
        return 0;
    }

    // TODO: Add support for swizzled textures
    LOG_WARN("Unimplemented texture format detected in sceGxmTextureSetWidth call.");

    return 0;
}

EXPORT(int, _sceGxmTextureSetWidth, SceGxmTexture *texture, uint32_t width) {
    TRACY_FUNC(_sceGxmTextureSetWidth, texture, width);
    return CALL_EXPORT(sceGxmTextureSetWidth, texture, width);
}

EXPORT(int, sceGxmTextureValidate, const SceGxmTexture *texture) {
    TRACY_FUNC(sceGxmTextureValidate, texture);
    if (!texture) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }
    return UNIMPLEMENTED();
}

EXPORT(int, sceGxmTransferCopy, uint32_t width, uint32_t height, uint32_t colorKeyValue, uint32_t colorKeyMask, SceGxmTransferColorKeyMode colorKeyMode,
    SceGxmTransferFormat srcFormat, SceGxmTransferType srcType, Ptr<void> srcAddress, uint32_t srcX, uint32_t srcY, int32_t srcStride,
    SceGxmTransferFormat destFormat, SceGxmTransferType destType, Ptr<void> destAddress, uint32_t destX, uint32_t destY, int32_t destStride,
    Ptr<SceGxmSyncObject> syncObject, SceGxmTransferFlags syncFlags, const SceGxmNotification *notification) {
    TRACY_FUNC(sceGxmTransferCopy, width, height, colorKeyValue, colorKeyMask, colorKeyMode, srcFormat, srcType, srcAddress, srcX);
#ifdef TRACY_ENABLE
    if (_tracy_activation_state) {
        __TRACY_LOG_ARG_IF(srcY)
        __TRACY_LOG_ARG_IF(srcStride)
        __TRACY_LOG_ARG_IF(destFormat)
        __TRACY_LOG_ARG_IF(destType)
        __TRACY_LOG_ARG_IF(destAddress)
        __TRACY_LOG_ARG_IF(destX)
        __TRACY_LOG_ARG_IF(destY)
        __TRACY_LOG_ARG_IF(destStride)
        __TRACY_LOG_ARG_IF(syncObject)
        __TRACY_LOG_ARG_IF(syncFlags)
        __TRACY_LOG_ARG_IF(notification)
    }
#endif // TRACY_ENABLE

    if (!srcAddress || !destAddress)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    const auto src_type_is_tiled = srcType == SCE_GXM_TRANSFER_TILED;
    const auto src_type_is_swizzled = srcType == SCE_GXM_TRANSFER_SWIZZLED;
    const auto dest_type_is_tiled = destType == SCE_GXM_TRANSFER_TILED;
    const auto dest_type_is_swizzled = destType == SCE_GXM_TRANSFER_SWIZZLED;

    const auto is_invalid_value = (src_type_is_tiled && dest_type_is_swizzled) || (src_type_is_swizzled && dest_type_is_tiled) || (src_type_is_swizzled && dest_type_is_swizzled);
    if (is_invalid_value)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_VALUE);

    uint32_t cmd_timestamp;
    if (syncObject) {
        SceGxmSyncObject *sync = syncObject.get(emuenv.mem);
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::WaitSyncObject, false,
            syncObject, sync->last_display.load());

        cmd_timestamp = ++sync->timestamp_ahead;
        sync->last_operation_global = emuenv.gxm.global_timestamp.fetch_add(1, std::memory_order_relaxed);
    }

    // needed, otherwise the command is not big enough
    SceGxmTransferImage *images = new SceGxmTransferImage[2];

    SceGxmTransferImage *src = &images[0];
    src->format = srcFormat;
    src->address = srcAddress;
    src->x = srcX;
    src->y = srcY;
    src->width = width;
    src->height = height;
    src->stride = srcStride;

    SceGxmTransferImage *dest = &images[1];
    dest->format = destFormat;
    dest->address = destAddress;
    dest->x = destX;
    dest->y = destY;
    dest->width = width;
    dest->height = height;
    dest->stride = destStride;

    renderer::transfer_copy(*emuenv.renderer, colorKeyValue, colorKeyMask, colorKeyMode, images, srcType, destType);

    if (notification)
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::SignalNotification, false, *notification);

    if (syncObject) {
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::SignalSyncObject, false,
            syncObject, cmd_timestamp);
    }

    return 0;
}

EXPORT(int, sceGxmTransferDownscale, SceGxmTransferFormat srcFormat, Ptr<void> srcAddress,
    uint32_t srcX, uint32_t srcY, uint32_t srcWidth, uint32_t srcHeight, int32_t srcStride,
    SceGxmTransferFormat destFormat, Ptr<void> destAddress, uint32_t destX, uint32_t destY, int32_t destStride,
    Ptr<SceGxmSyncObject> syncObject, SceGxmTransferFlags syncFlags, const SceGxmNotification *notification) {
    TRACY_FUNC(sceGxmTransferDownscale, srcFormat, srcX, srcY, srcWidth, srcHeight, srcStride, destFormat, destAddress, destX);
#ifdef TRACY_ENABLE
    if (_tracy_activation_state) {
        __TRACY_LOG_ARG_IF(destY)
        __TRACY_LOG_ARG_IF(destStride)
        __TRACY_LOG_ARG_IF(syncObject)
        __TRACY_LOG_ARG_IF(syncFlags)
        __TRACY_LOG_ARG_IF(notification)
    }
#endif // TRACY_ENABLE

    if (!srcAddress || !destAddress)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    uint32_t cmd_timestamp;
    if (syncObject) {
        SceGxmSyncObject *sync = syncObject.get(emuenv.mem);
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::WaitSyncObject, false,
            syncObject, sync->last_display.load());

        cmd_timestamp = ++sync->timestamp_ahead;
        sync->last_operation_global = emuenv.gxm.global_timestamp.fetch_add(1, std::memory_order_relaxed);
    }

    SceGxmTransferImage *src = new SceGxmTransferImage;
    src->format = srcFormat;
    src->address = srcAddress;
    src->x = srcX;
    src->y = srcY;
    src->width = srcWidth;
    src->height = srcHeight;
    src->stride = srcStride;

    SceGxmTransferImage *dest = new SceGxmTransferImage;
    dest->format = destFormat;
    dest->address = destAddress;
    dest->x = destX;
    dest->y = destY;
    dest->width = srcWidth / 2;
    dest->height = srcHeight / 2;
    dest->stride = destStride;

    renderer::transfer_downscale(*emuenv.renderer, src, dest);

    if (notification)
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::SignalNotification, false, *notification);

    if (syncObject) {
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::SignalSyncObject, false,
            syncObject, cmd_timestamp);
    }

    return 0;
}

EXPORT(int, sceGxmTransferFill, uint32_t fillColor, SceGxmTransferFormat destFormat, Ptr<void> destAddress,
    uint32_t destX, uint32_t destY, uint32_t destWidth, uint32_t destHeight, int32_t destStride,
    Ptr<SceGxmSyncObject> syncObject, SceGxmTransferFlags syncFlags, const SceGxmNotification *notification) {
    TRACY_FUNC(sceGxmTransferFill, fillColor, destFormat, destAddress, destX, destY, destWidth, destHeight, destStride, syncObject);
#ifdef TRACY_ENABLE
    if (_tracy_activation_state) {
        __TRACY_LOG_ARG_IF(syncFlags)
        __TRACY_LOG_ARG_IF(notification)
    }
#endif // TRACY_ENABLE

    if (!destAddress)
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);

    uint32_t cmd_timestamp;
    if (syncObject) {
        SceGxmSyncObject *sync = syncObject.get(emuenv.mem);
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::WaitSyncObject, false,
            syncObject, sync->last_display.load());

        cmd_timestamp = ++sync->timestamp_ahead;
        sync->last_operation_global = emuenv.gxm.global_timestamp.fetch_add(1, std::memory_order_relaxed);
    }

    SceGxmTransferImage *dest = new SceGxmTransferImage;
    dest->format = destFormat;
    dest->address = destAddress;
    dest->x = destX;
    dest->y = destY;
    dest->width = destWidth;
    dest->height = destHeight;
    dest->stride = destStride;
    renderer::transfer_fill(*emuenv.renderer, fillColor, dest);

    if (notification)
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::SignalNotification, false, *notification);

    if (syncObject) {
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::SignalSyncObject, false,
            syncObject, cmd_timestamp);
    }

    return 0;
}

EXPORT(int, sceGxmTransferFinish) {
    TRACY_FUNC(sceGxmTransferFinish);
    // same as sceGxmFinish
    renderer::finish(*emuenv.renderer, nullptr);

    return 0;
}

EXPORT(int, sceGxmUnmapFragmentUsseMemory, void *base) {
    TRACY_FUNC(sceGxmUnmapFragmentUsseMemory, base);
    if (!base) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    return 0;
}

EXPORT(int, sceGxmUnmapMemory, Ptr<void> base) {
    TRACY_FUNC(sceGxmUnmapMemory, base);
    if (!base) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (base.address() % KiB(4) != 0)
        LOG_WARN_ONCE("Unmapping unaligned GPU memory");

    // Make sure the base address are 4KiB-aligned
    Address aligned_base = align_down(base.address(), KiB(4));

    auto ite = emuenv.gxm.memory_mapped_regions.find(aligned_base);
    if (ite == emuenv.gxm.memory_mapped_regions.end()) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    if (emuenv.renderer->features.support_memory_mapping && ite->second.size > 0)
        renderer::send_single_command(*emuenv.renderer, nullptr, renderer::CommandOpcode::MemoryUnmap, true, aligned_base);

    emuenv.gxm.memory_mapped_regions.erase(ite);
    return 0;
}

EXPORT(int, sceGxmUnmapVertexUsseMemory, void *base) {
    TRACY_FUNC(sceGxmUnmapVertexUsseMemory, base);
    if (!base) {
        return RET_ERROR(SCE_GXM_ERROR_INVALID_POINTER);
    }

    return 0;
}

EXPORT(int, sceGxmVertexFence, SceGxmContext *immediateContext) {
    TRACY_FUNC(sceGxmVertexFence, immediateContext);
    return CALL_EXPORT(sceGxmMidSceneFlush, immediateContext, 0, nullptr, nullptr);
}

EXPORT(Ptr<const SceGxmProgram>, sceGxmVertexProgramGetProgram, const SceGxmVertexProgram *vertexProgram) {
    TRACY_FUNC(sceGxmVertexProgramGetProgram, vertexProgram);
    return vertexProgram->program;
}

EXPORT(int, sceGxmWaitEvent) {
    TRACY_FUNC(sceGxmWaitEvent);
    return UNIMPLEMENTED();
}
